diff options
author | Sa Liu <saliu@de.ibm.com> | 2007-07-13 18:31:08 +0000 |
---|---|---|
committer | Ulrich Weigand <uweigand@gcc.gnu.org> | 2007-07-13 18:31:08 +0000 |
commit | 39aeae8573ed2085fbfad05f3e8ba1456fcb6d44 (patch) | |
tree | 5426797cdb67dde5f5c7640c88b2c18b729ddd9b /gcc/config/spu/spu.c | |
parent | 2826df069f786fb321bb60525340fffaa1f22b6b (diff) | |
download | gcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.zip gcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.tar.gz gcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.tar.bz2 |
config.gcc: Add options for arch and tune on SPU.
2007-07-13 Sa Liu <saliu@de.ibm.com>
* config.gcc: Add options for arch and tune on SPU.
* config/spu/predicates.md: Add constant operands 0 and 1.
* config/spu/spu-builtins.def: Add builtins for double precision
floating point comparison: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt,
si_dftsv, spu_cmpeq_13, spu_cmpabseq_1, spu_cmpgt_13, spu_cmpabsgt_1,
spu_testsv.
* config/spu/spu-c.c: Define __SPU_EDP__ when builtins invoked with
a CELLEDP target.
* config/spu/spu-protos.h: Add new function prototypes.
* config/spu/spu.c (spu_override_options): Check options -march and
-mtune.
(spu_comp_icode): Add comparison code for DFmode and vector mode.
(spu_emit_branch_or_set): Use the new code for DFmode and vector
mode comparison.
(spu_const_from_int): New. Create a vector constant from 4 ints.
(get_vec_cmp_insn): New. Get insn index of vector compare instruction.
(spu_emit_vector_compare): New. Emit vector compare.
(spu_emit_vector_cond_expr): New. Emit vector conditional expression.
* config/spu/spu.h: Add options -march and -mtune. Define processor
types PROCESSOR_CELL and PROCESSOR_CELLEDP. Define macro
CANONICALIZE_COMPARISON.
* config/spu/spu.md: Add new insns for double precision compare
and double precision vector compare. Add vcond and smax/smin patterns
to enable DFmode vector conditional expression.
* config/spu/spu.opt: Add options -march and -mtune.
* config/spu/spu_internals.h: Add builtins for CELLEDP target:
si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv. Add builtin for
both CELL and CELLEDP targets: spu_testsv.
* config/spu/spu_intrinsics.h: Add flag mnemonics for test special
values.
testsuite/
* gcc.dg/vect/fast-math-vect-reduc-7.c: Switch on test
for V2DFmode vector conditional expression.
* gcc.target/spu/dfcmeq.c: New. Test combination of abs
and dfceq patterns.
* gcc.target/spu/dfcmgt.c: New. Test combination of abs
and dfcgt patterns.
* gcc.target/spu/intrinsics-2.c: New. Test intrinsics for
V2DFmode comparison and test special values.
* lib/target-supports.exp: Switch on test for V2DFmode
vector conditional expression.
From-SVN: r126626
Diffstat (limited to 'gcc/config/spu/spu.c')
-rw-r--r-- | gcc/config/spu/spu.c | 307 |
1 files changed, 287 insertions, 20 deletions
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index f963268..e283d87 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -95,6 +95,8 @@ static void emit_nop_for_insn (rtx insn); static bool insn_clobbers_hbr (rtx insn); static void spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance); +static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1, + enum machine_mode dmode); static rtx get_branch_target (rtx branch); static void insert_branch_hints (void); static void insert_nops (void); @@ -138,6 +140,11 @@ static int spu_builtin_vectorization_cost (bool); extern const char *reg_names[]; rtx spu_compare_op0, spu_compare_op1; +/* Which instruction set architecture to use. */ +int spu_arch; +/* Which cpu are we tuning for. */ +int spu_tune; + enum spu_immediate { SPU_NONE, SPU_IL, @@ -298,6 +305,28 @@ spu_override_options (void) if (spu_fixed_range_string) fix_range (spu_fixed_range_string); + + /* Determine processor architectural level. */ + if (spu_arch_string) + { + if (strcmp (&spu_arch_string[0], "cell") == 0) + spu_arch = PROCESSOR_CELL; + else if (strcmp (&spu_arch_string[0], "celledp") == 0) + spu_arch = PROCESSOR_CELLEDP; + else + error ("Unknown architecture '%s'", &spu_arch_string[0]); + } + + /* Determine processor to tune for. */ + if (spu_tune_string) + { + if (strcmp (&spu_tune_string[0], "cell") == 0) + spu_tune = PROCESSOR_CELL; + else if (strcmp (&spu_tune_string[0], "celledp") == 0) + spu_tune = PROCESSOR_CELLEDP; + else + error ("Unknown architecture '%s'", &spu_tune_string[0]); + } } /* Handle an attribute requiring a FUNCTION_DECL; arguments as in @@ -646,16 +675,19 @@ spu_expand_block_move (rtx ops[]) enum spu_comp_code { SPU_EQ, SPU_GT, SPU_GTU }; - -int spu_comp_icode[8][3] = { - {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi}, - {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi}, - {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si}, - {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di}, - {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti}, - {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0}, - {0, 0, 0}, - {CODE_FOR_ceq_vec, 0, 0}, +int spu_comp_icode[12][3] = { + {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi}, + {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi}, + {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si}, + {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di}, + {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti}, + {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0}, + {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0}, + {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi}, + {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi}, + {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si}, + {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0}, + {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0}, }; /* Generate a compare for CODE. Return a brand-new rtx that represents @@ -786,13 +818,26 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[]) index = 6; break; case V16QImode: + index = 7; + comp_mode = op_mode; + break; case V8HImode: + index = 8; + comp_mode = op_mode; + break; case V4SImode: - case V2DImode: + index = 9; + comp_mode = op_mode; + break; case V4SFmode: + index = 10; + comp_mode = V4SImode; + break; case V2DFmode: - index = 7; + index = 11; + comp_mode = V2DImode; break; + case V2DImode: default: abort (); } @@ -800,16 +845,19 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[]) if (GET_MODE (spu_compare_op1) == DFmode) { rtx reg = gen_reg_rtx (DFmode); - if (!flag_unsafe_math_optimizations + if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL) || (scode != SPU_GT && scode != SPU_EQ)) abort (); - if (reverse_compare) - emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0)); - else - emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1)); - reverse_compare = 0; - spu_compare_op0 = reg; - spu_compare_op1 = CONST0_RTX (DFmode); + if (spu_arch == PROCESSOR_CELL) + { + if (reverse_compare) + emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0)); + else + emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1)); + reverse_compare = 0; + spu_compare_op0 = reg; + spu_compare_op1 = CONST0_RTX (DFmode); + } } if (is_set == 0 && spu_compare_op1 == const0_rtx @@ -1884,6 +1932,30 @@ spu_const (enum machine_mode mode, HOST_WIDE_INT val) size.) */ int spu_hint_dist = (8 * 4); +/* Create a MODE vector constant from 4 ints. */ +rtx +spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d) +{ + unsigned char arr[16]; + arr[0] = (a >> 24) & 0xff; + arr[1] = (a >> 16) & 0xff; + arr[2] = (a >> 8) & 0xff; + arr[3] = (a >> 0) & 0xff; + arr[4] = (b >> 24) & 0xff; + arr[5] = (b >> 16) & 0xff; + arr[6] = (b >> 8) & 0xff; + arr[7] = (b >> 0) & 0xff; + arr[8] = (c >> 24) & 0xff; + arr[9] = (c >> 16) & 0xff; + arr[10] = (c >> 8) & 0xff; + arr[11] = (c >> 0) & 0xff; + arr[12] = (d >> 24) & 0xff; + arr[13] = (d >> 16) & 0xff; + arr[14] = (d >> 8) & 0xff; + arr[15] = (d >> 0) & 0xff; + return array_to_constant(mode, arr); +} + /* An array of these is used to propagate hints to predecessor blocks. */ struct spu_bb_info { @@ -4857,6 +4929,201 @@ spu_expand_vector_init (rtx target, rtx vals) } } +/* Return insn index for the vector compare instruction for given CODE, + and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */ + +static int +get_vec_cmp_insn (enum rtx_code code, + enum machine_mode dest_mode, + enum machine_mode op_mode) + +{ + switch (code) + { + case EQ: + if (dest_mode == V16QImode && op_mode == V16QImode) + return CODE_FOR_ceq_v16qi; + if (dest_mode == V8HImode && op_mode == V8HImode) + return CODE_FOR_ceq_v8hi; + if (dest_mode == V4SImode && op_mode == V4SImode) + return CODE_FOR_ceq_v4si; + if (dest_mode == V4SImode && op_mode == V4SFmode) + return CODE_FOR_ceq_v4sf; + if (dest_mode == V2DImode && op_mode == V2DFmode) + return CODE_FOR_ceq_v2df; + break; + case GT: + if (dest_mode == V16QImode && op_mode == V16QImode) + return CODE_FOR_cgt_v16qi; + if (dest_mode == V8HImode && op_mode == V8HImode) + return CODE_FOR_cgt_v8hi; + if (dest_mode == V4SImode && op_mode == V4SImode) + return CODE_FOR_cgt_v4si; + if (dest_mode == V4SImode && op_mode == V4SFmode) + return CODE_FOR_cgt_v4sf; + if (dest_mode == V2DImode && op_mode == V2DFmode) + return CODE_FOR_cgt_v2df; + break; + case GTU: + if (dest_mode == V16QImode && op_mode == V16QImode) + return CODE_FOR_clgt_v16qi; + if (dest_mode == V8HImode && op_mode == V8HImode) + return CODE_FOR_clgt_v8hi; + if (dest_mode == V4SImode && op_mode == V4SImode) + return CODE_FOR_clgt_v4si; + break; + default: + break; + } + return -1; +} + +/* Emit vector compare for operands OP0 and OP1 using code RCODE. + DMODE is expected destination mode. This is a recursive function. */ + +static rtx +spu_emit_vector_compare (enum rtx_code rcode, + rtx op0, rtx op1, + enum machine_mode dmode) +{ + int vec_cmp_insn; + rtx mask; + enum machine_mode dest_mode; + enum machine_mode op_mode = GET_MODE (op1); + + gcc_assert (GET_MODE (op0) == GET_MODE (op1)); + + /* Floating point vector compare instructions uses destination V4SImode. + Double floating point vector compare instructions uses destination V2DImode. + Move destination to appropriate mode later. */ + if (dmode == V4SFmode) + dest_mode = V4SImode; + else if (dmode == V2DFmode) + dest_mode = V2DImode; + else + dest_mode = dmode; + + mask = gen_reg_rtx (dest_mode); + vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); + + if (vec_cmp_insn == -1) + { + bool swap_operands = false; + bool try_again = false; + switch (rcode) + { + case LT: + rcode = GT; + swap_operands = true; + try_again = true; + break; + case LTU: + rcode = GTU; + swap_operands = true; + try_again = true; + break; + case NE: + /* Treat A != B as ~(A==B). */ + { + enum insn_code nor_code; + rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode); + nor_code = one_cmpl_optab->handlers[(int)dest_mode].insn_code; + gcc_assert (nor_code != CODE_FOR_nothing); + emit_insn (GEN_FCN (nor_code) (mask, eq_rtx)); + if (dmode != dest_mode) + { + rtx temp = gen_reg_rtx (dest_mode); + convert_move (temp, mask, 0); + return temp; + } + return mask; + } + break; + case GE: + case GEU: + case LE: + case LEU: + /* Try GT/GTU/LT/LTU OR EQ */ + { + rtx c_rtx, eq_rtx; + enum insn_code ior_code; + enum rtx_code new_code; + + switch (rcode) + { + case GE: new_code = GT; break; + case GEU: new_code = GTU; break; + case LE: new_code = LT; break; + case LEU: new_code = LTU; break; + default: + gcc_unreachable (); + } + + c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode); + eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode); + + ior_code = ior_optab->handlers[(int)dest_mode].insn_code; + gcc_assert (ior_code != CODE_FOR_nothing); + emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); + if (dmode != dest_mode) + { + rtx temp = gen_reg_rtx (dest_mode); + convert_move (temp, mask, 0); + return temp; + } + return mask; + } + break; + default: + gcc_unreachable (); + } + + /* You only get two chances. */ + if (try_again) + vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); + + gcc_assert (vec_cmp_insn != -1); + + if (swap_operands) + { + rtx tmp; + tmp = op0; + op0 = op1; + op1 = tmp; + } + } + + emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1)); + if (dmode != dest_mode) + { + rtx temp = gen_reg_rtx (dest_mode); + convert_move (temp, mask, 0); + return temp; + } + return mask; +} + + +/* Emit vector conditional expression. + DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands. + CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */ + +int +spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2, + rtx cond, rtx cc_op0, rtx cc_op1) +{ + enum machine_mode dest_mode = GET_MODE (dest); + enum rtx_code rcode = GET_CODE (cond); + rtx mask; + + /* Get the vector mask for the given relational operations. */ + mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode); + + emit_insn(gen_selb (dest, op2, op1, mask)); + + return 1; +} + static rtx spu_force_reg (enum machine_mode mode, rtx op) { |