aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/spu/spu.c
diff options
context:
space:
mode:
authorSa Liu <saliu@de.ibm.com>2007-07-13 18:31:08 +0000
committerUlrich Weigand <uweigand@gcc.gnu.org>2007-07-13 18:31:08 +0000
commit39aeae8573ed2085fbfad05f3e8ba1456fcb6d44 (patch)
tree5426797cdb67dde5f5c7640c88b2c18b729ddd9b /gcc/config/spu/spu.c
parent2826df069f786fb321bb60525340fffaa1f22b6b (diff)
downloadgcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.zip
gcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.tar.gz
gcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.tar.bz2
config.gcc: Add options for arch and tune on SPU.
2007-07-13 Sa Liu <saliu@de.ibm.com> * config.gcc: Add options for arch and tune on SPU. * config/spu/predicates.md: Add constant operands 0 and 1. * config/spu/spu-builtins.def: Add builtins for double precision floating point comparison: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv, spu_cmpeq_13, spu_cmpabseq_1, spu_cmpgt_13, spu_cmpabsgt_1, spu_testsv. * config/spu/spu-c.c: Define __SPU_EDP__ when builtins invoked with a CELLEDP target. * config/spu/spu-protos.h: Add new function prototypes. * config/spu/spu.c (spu_override_options): Check options -march and -mtune. (spu_comp_icode): Add comparison code for DFmode and vector mode. (spu_emit_branch_or_set): Use the new code for DFmode and vector mode comparison. (spu_const_from_int): New. Create a vector constant from 4 ints. (get_vec_cmp_insn): New. Get insn index of vector compare instruction. (spu_emit_vector_compare): New. Emit vector compare. (spu_emit_vector_cond_expr): New. Emit vector conditional expression. * config/spu/spu.h: Add options -march and -mtune. Define processor types PROCESSOR_CELL and PROCESSOR_CELLEDP. Define macro CANONICALIZE_COMPARISON. * config/spu/spu.md: Add new insns for double precision compare and double precision vector compare. Add vcond and smax/smin patterns to enable DFmode vector conditional expression. * config/spu/spu.opt: Add options -march and -mtune. * config/spu/spu_internals.h: Add builtins for CELLEDP target: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv. Add builtin for both CELL and CELLEDP targets: spu_testsv. * config/spu/spu_intrinsics.h: Add flag mnemonics for test special values. testsuite/ * gcc.dg/vect/fast-math-vect-reduc-7.c: Switch on test for V2DFmode vector conditional expression. * gcc.target/spu/dfcmeq.c: New. Test combination of abs and dfceq patterns. * gcc.target/spu/dfcmgt.c: New. Test combination of abs and dfcgt patterns. * gcc.target/spu/intrinsics-2.c: New. Test intrinsics for V2DFmode comparison and test special values. * lib/target-supports.exp: Switch on test for V2DFmode vector conditional expression. From-SVN: r126626
Diffstat (limited to 'gcc/config/spu/spu.c')
-rw-r--r--gcc/config/spu/spu.c307
1 files changed, 287 insertions, 20 deletions
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index f963268..e283d87 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -95,6 +95,8 @@ static void emit_nop_for_insn (rtx insn);
static bool insn_clobbers_hbr (rtx insn);
static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
int distance);
+static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
+ enum machine_mode dmode);
static rtx get_branch_target (rtx branch);
static void insert_branch_hints (void);
static void insert_nops (void);
@@ -138,6 +140,11 @@ static int spu_builtin_vectorization_cost (bool);
extern const char *reg_names[];
rtx spu_compare_op0, spu_compare_op1;
+/* Which instruction set architecture to use. */
+int spu_arch;
+/* Which cpu are we tuning for. */
+int spu_tune;
+
enum spu_immediate {
SPU_NONE,
SPU_IL,
@@ -298,6 +305,28 @@ spu_override_options (void)
if (spu_fixed_range_string)
fix_range (spu_fixed_range_string);
+
+ /* Determine processor architectural level. */
+ if (spu_arch_string)
+ {
+ if (strcmp (&spu_arch_string[0], "cell") == 0)
+ spu_arch = PROCESSOR_CELL;
+ else if (strcmp (&spu_arch_string[0], "celledp") == 0)
+ spu_arch = PROCESSOR_CELLEDP;
+ else
+ error ("Unknown architecture '%s'", &spu_arch_string[0]);
+ }
+
+ /* Determine processor to tune for. */
+ if (spu_tune_string)
+ {
+ if (strcmp (&spu_tune_string[0], "cell") == 0)
+ spu_tune = PROCESSOR_CELL;
+ else if (strcmp (&spu_tune_string[0], "celledp") == 0)
+ spu_tune = PROCESSOR_CELLEDP;
+ else
+ error ("Unknown architecture '%s'", &spu_tune_string[0]);
+ }
}
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
@@ -646,16 +675,19 @@ spu_expand_block_move (rtx ops[])
enum spu_comp_code
{ SPU_EQ, SPU_GT, SPU_GTU };
-
-int spu_comp_icode[8][3] = {
- {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
- {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
- {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
- {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
- {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
- {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
- {0, 0, 0},
- {CODE_FOR_ceq_vec, 0, 0},
+int spu_comp_icode[12][3] = {
+ {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
+ {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
+ {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
+ {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
+ {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
+ {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
+ {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
+ {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
+ {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
+ {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
+ {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
+ {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
};
/* Generate a compare for CODE. Return a brand-new rtx that represents
@@ -786,13 +818,26 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
index = 6;
break;
case V16QImode:
+ index = 7;
+ comp_mode = op_mode;
+ break;
case V8HImode:
+ index = 8;
+ comp_mode = op_mode;
+ break;
case V4SImode:
- case V2DImode:
+ index = 9;
+ comp_mode = op_mode;
+ break;
case V4SFmode:
+ index = 10;
+ comp_mode = V4SImode;
+ break;
case V2DFmode:
- index = 7;
+ index = 11;
+ comp_mode = V2DImode;
break;
+ case V2DImode:
default:
abort ();
}
@@ -800,16 +845,19 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
if (GET_MODE (spu_compare_op1) == DFmode)
{
rtx reg = gen_reg_rtx (DFmode);
- if (!flag_unsafe_math_optimizations
+ if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
|| (scode != SPU_GT && scode != SPU_EQ))
abort ();
- if (reverse_compare)
- emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
- else
- emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
- reverse_compare = 0;
- spu_compare_op0 = reg;
- spu_compare_op1 = CONST0_RTX (DFmode);
+ if (spu_arch == PROCESSOR_CELL)
+ {
+ if (reverse_compare)
+ emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
+ else
+ emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
+ reverse_compare = 0;
+ spu_compare_op0 = reg;
+ spu_compare_op1 = CONST0_RTX (DFmode);
+ }
}
if (is_set == 0 && spu_compare_op1 == const0_rtx
@@ -1884,6 +1932,30 @@ spu_const (enum machine_mode mode, HOST_WIDE_INT val)
size.) */
int spu_hint_dist = (8 * 4);
+/* Create a MODE vector constant from 4 ints. */
+rtx
+spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
+{
+ unsigned char arr[16];
+ arr[0] = (a >> 24) & 0xff;
+ arr[1] = (a >> 16) & 0xff;
+ arr[2] = (a >> 8) & 0xff;
+ arr[3] = (a >> 0) & 0xff;
+ arr[4] = (b >> 24) & 0xff;
+ arr[5] = (b >> 16) & 0xff;
+ arr[6] = (b >> 8) & 0xff;
+ arr[7] = (b >> 0) & 0xff;
+ arr[8] = (c >> 24) & 0xff;
+ arr[9] = (c >> 16) & 0xff;
+ arr[10] = (c >> 8) & 0xff;
+ arr[11] = (c >> 0) & 0xff;
+ arr[12] = (d >> 24) & 0xff;
+ arr[13] = (d >> 16) & 0xff;
+ arr[14] = (d >> 8) & 0xff;
+ arr[15] = (d >> 0) & 0xff;
+ return array_to_constant(mode, arr);
+}
+
/* An array of these is used to propagate hints to predecessor blocks. */
struct spu_bb_info
{
@@ -4857,6 +4929,201 @@ spu_expand_vector_init (rtx target, rtx vals)
}
}
+/* Return insn index for the vector compare instruction for given CODE,
+ and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
+
+static int
+get_vec_cmp_insn (enum rtx_code code,
+ enum machine_mode dest_mode,
+ enum machine_mode op_mode)
+
+{
+ switch (code)
+ {
+ case EQ:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_ceq_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_ceq_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_ceq_v4si;
+ if (dest_mode == V4SImode && op_mode == V4SFmode)
+ return CODE_FOR_ceq_v4sf;
+ if (dest_mode == V2DImode && op_mode == V2DFmode)
+ return CODE_FOR_ceq_v2df;
+ break;
+ case GT:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_cgt_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_cgt_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_cgt_v4si;
+ if (dest_mode == V4SImode && op_mode == V4SFmode)
+ return CODE_FOR_cgt_v4sf;
+ if (dest_mode == V2DImode && op_mode == V2DFmode)
+ return CODE_FOR_cgt_v2df;
+ break;
+ case GTU:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_clgt_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_clgt_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_clgt_v4si;
+ break;
+ default:
+ break;
+ }
+ return -1;
+}
+
+/* Emit vector compare for operands OP0 and OP1 using code RCODE.
+ DMODE is expected destination mode. This is a recursive function. */
+
+static rtx
+spu_emit_vector_compare (enum rtx_code rcode,
+ rtx op0, rtx op1,
+ enum machine_mode dmode)
+{
+ int vec_cmp_insn;
+ rtx mask;
+ enum machine_mode dest_mode;
+ enum machine_mode op_mode = GET_MODE (op1);
+
+ gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+ /* Floating point vector compare instructions uses destination V4SImode.
+ Double floating point vector compare instructions uses destination V2DImode.
+ Move destination to appropriate mode later. */
+ if (dmode == V4SFmode)
+ dest_mode = V4SImode;
+ else if (dmode == V2DFmode)
+ dest_mode = V2DImode;
+ else
+ dest_mode = dmode;
+
+ mask = gen_reg_rtx (dest_mode);
+ vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+ if (vec_cmp_insn == -1)
+ {
+ bool swap_operands = false;
+ bool try_again = false;
+ switch (rcode)
+ {
+ case LT:
+ rcode = GT;
+ swap_operands = true;
+ try_again = true;
+ break;
+ case LTU:
+ rcode = GTU;
+ swap_operands = true;
+ try_again = true;
+ break;
+ case NE:
+ /* Treat A != B as ~(A==B). */
+ {
+ enum insn_code nor_code;
+ rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+ nor_code = one_cmpl_optab->handlers[(int)dest_mode].insn_code;
+ gcc_assert (nor_code != CODE_FOR_nothing);
+ emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+ }
+ break;
+ case GE:
+ case GEU:
+ case LE:
+ case LEU:
+ /* Try GT/GTU/LT/LTU OR EQ */
+ {
+ rtx c_rtx, eq_rtx;
+ enum insn_code ior_code;
+ enum rtx_code new_code;
+
+ switch (rcode)
+ {
+ case GE: new_code = GT; break;
+ case GEU: new_code = GTU; break;
+ case LE: new_code = LT; break;
+ case LEU: new_code = LTU; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
+ eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+
+ ior_code = ior_optab->handlers[(int)dest_mode].insn_code;
+ gcc_assert (ior_code != CODE_FOR_nothing);
+ emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* You only get two chances. */
+ if (try_again)
+ vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+ gcc_assert (vec_cmp_insn != -1);
+
+ if (swap_operands)
+ {
+ rtx tmp;
+ tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
+ }
+
+ emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+}
+
+
+/* Emit vector conditional expression.
+ DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
+ CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
+
+int
+spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
+ rtx cond, rtx cc_op0, rtx cc_op1)
+{
+ enum machine_mode dest_mode = GET_MODE (dest);
+ enum rtx_code rcode = GET_CODE (cond);
+ rtx mask;
+
+ /* Get the vector mask for the given relational operations. */
+ mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
+
+ emit_insn(gen_selb (dest, op2, op1, mask));
+
+ return 1;
+}
+
static rtx
spu_force_reg (enum machine_mode mode, rtx op)
{