aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/spu
diff options
context:
space:
mode:
authorSa Liu <saliu@de.ibm.com>2007-07-13 18:31:08 +0000
committerUlrich Weigand <uweigand@gcc.gnu.org>2007-07-13 18:31:08 +0000
commit39aeae8573ed2085fbfad05f3e8ba1456fcb6d44 (patch)
tree5426797cdb67dde5f5c7640c88b2c18b729ddd9b /gcc/config/spu
parent2826df069f786fb321bb60525340fffaa1f22b6b (diff)
downloadgcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.zip
gcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.tar.gz
gcc-39aeae8573ed2085fbfad05f3e8ba1456fcb6d44.tar.bz2
config.gcc: Add options for arch and tune on SPU.
2007-07-13 Sa Liu <saliu@de.ibm.com> * config.gcc: Add options for arch and tune on SPU. * config/spu/predicates.md: Add constant operands 0 and 1. * config/spu/spu-builtins.def: Add builtins for double precision floating point comparison: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv, spu_cmpeq_13, spu_cmpabseq_1, spu_cmpgt_13, spu_cmpabsgt_1, spu_testsv. * config/spu/spu-c.c: Define __SPU_EDP__ when builtins invoked with a CELLEDP target. * config/spu/spu-protos.h: Add new function prototypes. * config/spu/spu.c (spu_override_options): Check options -march and -mtune. (spu_comp_icode): Add comparison code for DFmode and vector mode. (spu_emit_branch_or_set): Use the new code for DFmode and vector mode comparison. (spu_const_from_int): New. Create a vector constant from 4 ints. (get_vec_cmp_insn): New. Get insn index of vector compare instruction. (spu_emit_vector_compare): New. Emit vector compare. (spu_emit_vector_cond_expr): New. Emit vector conditional expression. * config/spu/spu.h: Add options -march and -mtune. Define processor types PROCESSOR_CELL and PROCESSOR_CELLEDP. Define macro CANONICALIZE_COMPARISON. * config/spu/spu.md: Add new insns for double precision compare and double precision vector compare. Add vcond and smax/smin patterns to enable DFmode vector conditional expression. * config/spu/spu.opt: Add options -march and -mtune. * config/spu/spu_internals.h: Add builtins for CELLEDP target: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv. Add builtin for both CELL and CELLEDP targets: spu_testsv. * config/spu/spu_intrinsics.h: Add flag mnemonics for test special values. testsuite/ * gcc.dg/vect/fast-math-vect-reduc-7.c: Switch on test for V2DFmode vector conditional expression. * gcc.target/spu/dfcmeq.c: New. Test combination of abs and dfceq patterns. * gcc.target/spu/dfcmgt.c: New. Test combination of abs and dfcgt patterns. * gcc.target/spu/intrinsics-2.c: New. Test intrinsics for V2DFmode comparison and test special values. * lib/target-supports.exp: Switch on test for V2DFmode vector conditional expression. From-SVN: r126626
Diffstat (limited to 'gcc/config/spu')
-rw-r--r--gcc/config/spu/predicates.md9
-rw-r--r--gcc/config/spu/spu-builtins.def16
-rw-r--r--gcc/config/spu/spu-c.c2
-rw-r--r--gcc/config/spu/spu-protos.h3
-rw-r--r--gcc/config/spu/spu.c307
-rw-r--r--gcc/config/spu/spu.h30
-rw-r--r--gcc/config/spu/spu.md662
-rw-r--r--gcc/config/spu/spu.opt8
-rw-r--r--gcc/config/spu/spu_internals.h10
-rw-r--r--gcc/config/spu/spu_intrinsics.h10
10 files changed, 996 insertions, 61 deletions
diff --git a/gcc/config/spu/predicates.md b/gcc/config/spu/predicates.md
index 8b31e65..74659c3 100644
--- a/gcc/config/spu/predicates.md
+++ b/gcc/config/spu/predicates.md
@@ -16,6 +16,15 @@
;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
;; 02110-1301, USA.
+;; Return 1 if operand is constant zero of its mode
+(define_predicate "const_zero_operand"
+ (and (match_code "const_int,const,const_double,const_vector")
+ (match_test "op == CONST0_RTX (mode)")))
+
+(define_predicate "const_one_operand"
+ (and (match_code "const_int,const,const_double,const_vector")
+ (match_test "op == CONST1_RTX (mode)")))
+
(define_predicate "spu_reg_operand"
(and (match_operand 0 "register_operand")
(ior (not (match_code "subreg"))
diff --git a/gcc/config/spu/spu-builtins.def b/gcc/config/spu/spu-builtins.def
index 6ae382f..9ab1b5d 100644
--- a/gcc/config/spu/spu-builtins.def
+++ b/gcc/config/spu/spu-builtins.def
@@ -189,9 +189,14 @@ DEF_BUILTIN (SI_CFLTU, CODE_FOR_spu_cfltu, "si_cfltu", B_INSN,
DEF_BUILTIN (SI_FRDS, CODE_FOR_spu_frds, "si_frds", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FESD, CODE_FOR_spu_fesd, "si_fesd", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FCEQ, CODE_FOR_ceq_v4sf, "si_fceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCEQ, CODE_FOR_ceq_v2df, "si_dfceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FCMEQ, CODE_FOR_cmeq_v4sf, "si_fcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCMEQ, CODE_FOR_cmeq_v2df, "si_dfcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FCGT, CODE_FOR_cgt_v4sf, "si_fcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCGT, CODE_FOR_cgt_v2df, "si_dfcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_FCMGT, CODE_FOR_cmgt_v4sf, "si_fcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFCMGT, CODE_FOR_cmgt_v2df, "si_dfcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFTSV, CODE_FOR_dftsv, "si_dftsv", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
DEF_BUILTIN (SI_STOP, CODE_FOR_spu_stop, "si_stop", B_INSN, _A2(SPU_BTI_VOID, SPU_BTI_U14))
DEF_BUILTIN (SI_STOPD, CODE_FOR_spu_stopd, "si_stopd", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
DEF_BUILTIN (SI_LNOP, CODE_FOR_lnop, "si_lnop", B_INSN, _A1(SPU_BTI_VOID))
@@ -245,11 +250,10 @@ DEF_BUILTIN (SPU_SUMB, CODE_FOR_spu_sumb, "spu_sumb", B_INSN,
DEF_BUILTIN (SPU_BISLED, CODE_FOR_spu_bisled, "spu_bisled", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
DEF_BUILTIN (SPU_BISLED_D, CODE_FOR_spu_bisledd, "spu_bisled_d", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
DEF_BUILTIN (SPU_BISLED_E, CODE_FOR_spu_bislede, "spu_bisled_e", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
-DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_cmeq_v4sf, "spu_cmpabseq", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
-DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
DEF_BUILTIN (SPU_IDISABLE, CODE_FOR_spu_idisable, "spu_idisable", B_INSN, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_IENABLE, CODE_FOR_spu_ienable, "spu_ienable", B_INSN, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_MASK_FOR_LOAD, CODE_FOR_spu_lvsr, "spu_lvsr", B_INSN, _A2(SPU_BTI_V16QI, SPU_BTI_PTR))
+DEF_BUILTIN (SPU_TESTSV, CODE_FOR_dftsv, "spu_testsv", B_INSN, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_U7))
/* definitions to support overloaded generic builtin functions: */
@@ -339,6 +343,10 @@ DEF_BUILTIN (SPU_CMPEQ_9, CODE_FOR_ceq_v8hi, "spu_cmpeq_9",
DEF_BUILTIN (SPU_CMPEQ_10, CODE_FOR_ceq_v8hi, "spu_cmpeq_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI))
DEF_BUILTIN (SPU_CMPEQ_11, CODE_FOR_ceq_v4si, "spu_cmpeq_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI))
DEF_BUILTIN (SPU_CMPEQ_12, CODE_FOR_ceq_v4si, "spu_cmpeq_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI))
+DEF_BUILTIN (SPU_CMPEQ_13, CODE_FOR_ceq_v2df, "spu_cmpeq_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_nothing, "spu_cmpabseq", B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPABSEQ_0, CODE_FOR_cmeq_v4sf, "spu_cmpabseq_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPABSEQ_1, CODE_FOR_cmeq_v2df, "spu_cmpabseq_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
DEF_BUILTIN (SPU_CMPGT, CODE_FOR_nothing, "spu_cmpgt", B_OVERLOAD, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_CMPGT_0, CODE_FOR_clgt_v16qi, "spu_cmpgt_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
DEF_BUILTIN (SPU_CMPGT_1, CODE_FOR_cgt_v16qi, "spu_cmpgt_1", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI, SPU_BTI_V16QI))
@@ -353,6 +361,10 @@ DEF_BUILTIN (SPU_CMPGT_9, CODE_FOR_clgt_v8hi, "spu_cmpgt_9",
DEF_BUILTIN (SPU_CMPGT_10, CODE_FOR_cgt_v8hi, "spu_cmpgt_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI))
DEF_BUILTIN (SPU_CMPGT_11, CODE_FOR_cgt_v4si, "spu_cmpgt_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI))
DEF_BUILTIN (SPU_CMPGT_12, CODE_FOR_clgt_v4si, "spu_cmpgt_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_CMPGT_13, CODE_FOR_cgt_v2df, "spu_cmpgt_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_nothing, "spu_cmpabsgt", B_OVERLOAD, _A1(SPU_BTI_VOID))
+DEF_BUILTIN (SPU_CMPABSGT_0, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_CMPABSGT_1, CODE_FOR_cmgt_v2df, "spu_cmpabsgt_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
DEF_BUILTIN (SPU_HCMPEQ, CODE_FOR_nothing, "spu_hcmpeq", B_OVERLOAD, _A1(SPU_BTI_VOID))
DEF_BUILTIN (SPU_HCMPEQ_0, CODE_FOR_spu_heq, "spu_hcmpeq_0", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_INTSI, SPU_BTI_INTSI))
DEF_BUILTIN (SPU_HCMPEQ_1, CODE_FOR_spu_heq, "spu_hcmpeq_1", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_UINTSI, SPU_BTI_UINTSI))
diff --git a/gcc/config/spu/spu-c.c b/gcc/config/spu/spu-c.c
index 43d5d16..56ddefb 100644
--- a/gcc/config/spu/spu-c.c
+++ b/gcc/config/spu/spu-c.c
@@ -138,6 +138,8 @@ spu_cpu_cpp_builtins (struct cpp_reader *pfile)
builtin_define_std ("__SPU__");
cpp_assert (pfile, "cpu=spu");
cpp_assert (pfile, "machine=spu");
+ if (spu_arch == PROCESSOR_CELLEDP)
+ builtin_define_std ("__SPU_EDP__");
builtin_define_std ("__vector=__attribute__((__spu_vector__))");
}
diff --git a/gcc/config/spu/spu-protos.h b/gcc/config/spu/spu-protos.h
index 4caaf1b..d069536 100644
--- a/gcc/config/spu/spu-protos.h
+++ b/gcc/config/spu/spu-protos.h
@@ -32,6 +32,7 @@ extern void spu_expand_insv (rtx * ops);
extern int spu_expand_block_move (rtx * ops);
extern void spu_emit_branch_or_set (int is_set, enum rtx_code code,
rtx * operands);
+extern int spu_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
extern HOST_WIDE_INT const_double_to_hwint (rtx x);
extern rtx hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v);
extern void print_operand_address (FILE * file, register rtx addr);
@@ -43,6 +44,8 @@ extern void spu_expand_prologue (void);
extern void spu_expand_epilogue (unsigned char sibcall_p);
extern rtx spu_return_addr (int count, rtx frame);
extern rtx spu_const (enum machine_mode mode, HOST_WIDE_INT val);
+extern rtx spu_const_from_ints (enum machine_mode mode,
+ int a, int b, int c, int d);
extern struct rtx_def *spu_float_const (const char *string,
enum machine_mode mode);
extern int immediate_load_p (rtx op, enum machine_mode mode);
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index f963268..e283d87 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -95,6 +95,8 @@ static void emit_nop_for_insn (rtx insn);
static bool insn_clobbers_hbr (rtx insn);
static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
int distance);
+static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
+ enum machine_mode dmode);
static rtx get_branch_target (rtx branch);
static void insert_branch_hints (void);
static void insert_nops (void);
@@ -138,6 +140,11 @@ static int spu_builtin_vectorization_cost (bool);
extern const char *reg_names[];
rtx spu_compare_op0, spu_compare_op1;
+/* Which instruction set architecture to use. */
+int spu_arch;
+/* Which cpu are we tuning for. */
+int spu_tune;
+
enum spu_immediate {
SPU_NONE,
SPU_IL,
@@ -298,6 +305,28 @@ spu_override_options (void)
if (spu_fixed_range_string)
fix_range (spu_fixed_range_string);
+
+ /* Determine processor architectural level. */
+ if (spu_arch_string)
+ {
+ if (strcmp (&spu_arch_string[0], "cell") == 0)
+ spu_arch = PROCESSOR_CELL;
+ else if (strcmp (&spu_arch_string[0], "celledp") == 0)
+ spu_arch = PROCESSOR_CELLEDP;
+ else
+ error ("Unknown architecture '%s'", &spu_arch_string[0]);
+ }
+
+ /* Determine processor to tune for. */
+ if (spu_tune_string)
+ {
+ if (strcmp (&spu_tune_string[0], "cell") == 0)
+ spu_tune = PROCESSOR_CELL;
+ else if (strcmp (&spu_tune_string[0], "celledp") == 0)
+ spu_tune = PROCESSOR_CELLEDP;
+ else
+ error ("Unknown architecture '%s'", &spu_tune_string[0]);
+ }
}
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
@@ -646,16 +675,19 @@ spu_expand_block_move (rtx ops[])
enum spu_comp_code
{ SPU_EQ, SPU_GT, SPU_GTU };
-
-int spu_comp_icode[8][3] = {
- {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
- {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
- {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
- {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
- {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
- {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
- {0, 0, 0},
- {CODE_FOR_ceq_vec, 0, 0},
+int spu_comp_icode[12][3] = {
+ {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
+ {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
+ {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
+ {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
+ {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
+ {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
+ {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
+ {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
+ {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
+ {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
+ {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
+ {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
};
/* Generate a compare for CODE. Return a brand-new rtx that represents
@@ -786,13 +818,26 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
index = 6;
break;
case V16QImode:
+ index = 7;
+ comp_mode = op_mode;
+ break;
case V8HImode:
+ index = 8;
+ comp_mode = op_mode;
+ break;
case V4SImode:
- case V2DImode:
+ index = 9;
+ comp_mode = op_mode;
+ break;
case V4SFmode:
+ index = 10;
+ comp_mode = V4SImode;
+ break;
case V2DFmode:
- index = 7;
+ index = 11;
+ comp_mode = V2DImode;
break;
+ case V2DImode:
default:
abort ();
}
@@ -800,16 +845,19 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
if (GET_MODE (spu_compare_op1) == DFmode)
{
rtx reg = gen_reg_rtx (DFmode);
- if (!flag_unsafe_math_optimizations
+ if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
|| (scode != SPU_GT && scode != SPU_EQ))
abort ();
- if (reverse_compare)
- emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
- else
- emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
- reverse_compare = 0;
- spu_compare_op0 = reg;
- spu_compare_op1 = CONST0_RTX (DFmode);
+ if (spu_arch == PROCESSOR_CELL)
+ {
+ if (reverse_compare)
+ emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
+ else
+ emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
+ reverse_compare = 0;
+ spu_compare_op0 = reg;
+ spu_compare_op1 = CONST0_RTX (DFmode);
+ }
}
if (is_set == 0 && spu_compare_op1 == const0_rtx
@@ -1884,6 +1932,30 @@ spu_const (enum machine_mode mode, HOST_WIDE_INT val)
size.) */
int spu_hint_dist = (8 * 4);
+/* Create a MODE vector constant from 4 ints. */
+rtx
+spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
+{
+ unsigned char arr[16];
+ arr[0] = (a >> 24) & 0xff;
+ arr[1] = (a >> 16) & 0xff;
+ arr[2] = (a >> 8) & 0xff;
+ arr[3] = (a >> 0) & 0xff;
+ arr[4] = (b >> 24) & 0xff;
+ arr[5] = (b >> 16) & 0xff;
+ arr[6] = (b >> 8) & 0xff;
+ arr[7] = (b >> 0) & 0xff;
+ arr[8] = (c >> 24) & 0xff;
+ arr[9] = (c >> 16) & 0xff;
+ arr[10] = (c >> 8) & 0xff;
+ arr[11] = (c >> 0) & 0xff;
+ arr[12] = (d >> 24) & 0xff;
+ arr[13] = (d >> 16) & 0xff;
+ arr[14] = (d >> 8) & 0xff;
+ arr[15] = (d >> 0) & 0xff;
+ return array_to_constant(mode, arr);
+}
+
/* An array of these is used to propagate hints to predecessor blocks. */
struct spu_bb_info
{
@@ -4857,6 +4929,201 @@ spu_expand_vector_init (rtx target, rtx vals)
}
}
+/* Return insn index for the vector compare instruction for given CODE,
+ and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
+
+static int
+get_vec_cmp_insn (enum rtx_code code,
+ enum machine_mode dest_mode,
+ enum machine_mode op_mode)
+
+{
+ switch (code)
+ {
+ case EQ:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_ceq_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_ceq_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_ceq_v4si;
+ if (dest_mode == V4SImode && op_mode == V4SFmode)
+ return CODE_FOR_ceq_v4sf;
+ if (dest_mode == V2DImode && op_mode == V2DFmode)
+ return CODE_FOR_ceq_v2df;
+ break;
+ case GT:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_cgt_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_cgt_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_cgt_v4si;
+ if (dest_mode == V4SImode && op_mode == V4SFmode)
+ return CODE_FOR_cgt_v4sf;
+ if (dest_mode == V2DImode && op_mode == V2DFmode)
+ return CODE_FOR_cgt_v2df;
+ break;
+ case GTU:
+ if (dest_mode == V16QImode && op_mode == V16QImode)
+ return CODE_FOR_clgt_v16qi;
+ if (dest_mode == V8HImode && op_mode == V8HImode)
+ return CODE_FOR_clgt_v8hi;
+ if (dest_mode == V4SImode && op_mode == V4SImode)
+ return CODE_FOR_clgt_v4si;
+ break;
+ default:
+ break;
+ }
+ return -1;
+}
+
+/* Emit vector compare for operands OP0 and OP1 using code RCODE.
+ DMODE is expected destination mode. This is a recursive function. */
+
+static rtx
+spu_emit_vector_compare (enum rtx_code rcode,
+ rtx op0, rtx op1,
+ enum machine_mode dmode)
+{
+ int vec_cmp_insn;
+ rtx mask;
+ enum machine_mode dest_mode;
+ enum machine_mode op_mode = GET_MODE (op1);
+
+ gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+
+ /* Floating point vector compare instructions uses destination V4SImode.
+ Double floating point vector compare instructions uses destination V2DImode.
+ Move destination to appropriate mode later. */
+ if (dmode == V4SFmode)
+ dest_mode = V4SImode;
+ else if (dmode == V2DFmode)
+ dest_mode = V2DImode;
+ else
+ dest_mode = dmode;
+
+ mask = gen_reg_rtx (dest_mode);
+ vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+ if (vec_cmp_insn == -1)
+ {
+ bool swap_operands = false;
+ bool try_again = false;
+ switch (rcode)
+ {
+ case LT:
+ rcode = GT;
+ swap_operands = true;
+ try_again = true;
+ break;
+ case LTU:
+ rcode = GTU;
+ swap_operands = true;
+ try_again = true;
+ break;
+ case NE:
+ /* Treat A != B as ~(A==B). */
+ {
+ enum insn_code nor_code;
+ rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+ nor_code = one_cmpl_optab->handlers[(int)dest_mode].insn_code;
+ gcc_assert (nor_code != CODE_FOR_nothing);
+ emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+ }
+ break;
+ case GE:
+ case GEU:
+ case LE:
+ case LEU:
+ /* Try GT/GTU/LT/LTU OR EQ */
+ {
+ rtx c_rtx, eq_rtx;
+ enum insn_code ior_code;
+ enum rtx_code new_code;
+
+ switch (rcode)
+ {
+ case GE: new_code = GT; break;
+ case GEU: new_code = GTU; break;
+ case LE: new_code = LT; break;
+ case LEU: new_code = LTU; break;
+ default:
+ gcc_unreachable ();
+ }
+
+ c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
+ eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+
+ ior_code = ior_optab->handlers[(int)dest_mode].insn_code;
+ gcc_assert (ior_code != CODE_FOR_nothing);
+ emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* You only get two chances. */
+ if (try_again)
+ vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
+
+ gcc_assert (vec_cmp_insn != -1);
+
+ if (swap_operands)
+ {
+ rtx tmp;
+ tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
+ }
+
+ emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
+ if (dmode != dest_mode)
+ {
+ rtx temp = gen_reg_rtx (dest_mode);
+ convert_move (temp, mask, 0);
+ return temp;
+ }
+ return mask;
+}
+
+
+/* Emit vector conditional expression.
+ DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
+ CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
+
+int
+spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
+ rtx cond, rtx cc_op0, rtx cc_op1)
+{
+ enum machine_mode dest_mode = GET_MODE (dest);
+ enum rtx_code rcode = GET_CODE (cond);
+ rtx mask;
+
+ /* Get the vector mask for the given relational operations. */
+ mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
+
+ emit_insn(gen_selb (dest, op2, op1, mask));
+
+ return 1;
+}
+
static rtx
spu_force_reg (enum machine_mode mode, rtx op)
{
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
index b8af6b2..8de32f8 100644
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -32,6 +32,23 @@
extern int target_flags;
extern const char *spu_fixed_range_string;
+/* Which processor to generate code or schedule for. */
+enum processor_type
+{
+ PROCESSOR_CELL,
+ PROCESSOR_CELLEDP
+};
+
+extern GTY(()) int spu_arch;
+extern GTY(()) int spu_tune;
+
+/* Support for a compile-time default architecture and tuning. The rules are:
+ --with-arch is ignored if -march is specified.
+ --with-tune is ignored if -mtune is specified. */
+#define OPTION_DEFAULT_SPECS \
+ {"arch", "%{!march=*:-march=%(VALUE)}" }, \
+ {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }
+
/* Default target_flags if no switches specified. */
#ifndef TARGET_DEFAULT
#define TARGET_DEFAULT (MASK_ERROR_RELOC | MASK_SAFE_DMA | MASK_BRANCH_HINTS)
@@ -605,7 +622,18 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
#define NO_IMPLICIT_EXTERN_C 1
#define HANDLE_PRAGMA_PACK_PUSH_POP 1
-
+
+/* Canonicalize a comparison from one we don't have to one we do have. */
+#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \
+ do { \
+ if (((CODE) == LE || (CODE) == LT || (CODE) == LEU || (CODE) == LTU)) \
+ { \
+ rtx tem = (OP0); \
+ (OP0) = (OP1); \
+ (OP1) = tem; \
+ (CODE) = swap_condition (CODE); \
+ } \
+ } while (0)
/* These are set by the cmp patterns and used while expanding
conditional branches. */
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index b4c59cb..0b339c6 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -29,6 +29,7 @@
(define_attr "length" ""
(const_int 4))
+(define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
;; Processor type -- this attribute must exactly match the processor_type
;; enumeration in spu.h.
@@ -59,9 +60,17 @@
;; for 6 cycles and the rest of the operation pipelines for
;; 7 cycles. The simplest way to model this is to simply ignore
;; the 6 cyle stall.
-(define_insn_reservation "FPD" 7 (eq_attr "type" "fpd")
+(define_insn_reservation "FPD" 7
+ (and (eq_attr "tune" "cell")
+ (eq_attr "type" "fpd"))
"pipe0 + pipe1, fp, nothing*5")
+;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
+(define_insn_reservation "FPD_CELLEDP" 9
+ (and (eq_attr "tune" "celledp")
+ (eq_attr "type" "fpd"))
+ "pipe0 + fp, nothing*8")
+
(define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
"pipe1")
@@ -144,6 +153,7 @@
(UNSPEC_WRCH 48)
(UNSPEC_SPU_REALIGN_LOAD 49)
(UNSPEC_SPU_MASK_FOR_LOAD 50)
+ (UNSPEC_DFTSV 51)
])
(include "predicates.md")
@@ -192,6 +202,16 @@
(define_mode_macro VSF [SF V4SF])
(define_mode_macro VDF [DF V2DF])
+(define_mode_macro VCMP [V16QI
+ V8HI
+ V4SI
+ V4SF
+ V2DF])
+
+(define_mode_macro VCMPU [V16QI
+ V8HI
+ V4SI])
+
(define_mode_attr bh [(QI "b") (V16QI "b")
(HI "h") (V8HI "h")
(SI "") (V4SI "")])
@@ -200,9 +220,14 @@
(DF "d") (V2DF "d")])
(define_mode_attr d6 [(SF "6") (V4SF "6")
(DF "d") (V2DF "d")])
-(define_mode_attr f2i [(SF "SI") (V4SF "V4SI")
+
+(define_mode_attr f2i [(SF "si") (V4SF "v4si")
+ (DF "di") (V2DF "v2di")])
+(define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
(DF "DI") (V2DF "V2DI")])
+(define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
+
(define_mode_attr umask [(HI "f") (V8HI "f")
(SI "g") (V4SI "g")])
(define_mode_attr nmask [(HI "F") (V8HI "F")
@@ -990,8 +1015,8 @@
(neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
(use (match_dup 2))])]
""
- "operands[2] = gen_reg_rtx (<f2i>mode);
- emit_move_insn (operands[2], spu_const (<f2i>mode, -0x80000000ull));")
+ "operands[2] = gen_reg_rtx (<F2I>mode);
+ emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
(define_expand "neg<mode>2"
[(parallel
@@ -999,22 +1024,22 @@
(neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
(use (match_dup 2))])]
""
- "operands[2] = gen_reg_rtx (<f2i>mode);
- emit_move_insn (operands[2], spu_const (<f2i>mode, -0x8000000000000000ull));")
+ "operands[2] = gen_reg_rtx (<F2I>mode);
+ emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
(define_insn_and_split "_neg<mode>2"
[(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
(neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
- (use (match_operand:<f2i> 2 "spu_reg_operand" "r"))]
+ (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
""
"#"
""
- [(set (match_dup:<f2i> 3)
- (xor:<f2i> (match_dup:<f2i> 4)
- (match_dup:<f2i> 2)))]
+ [(set (match_dup:<F2I> 3)
+ (xor:<F2I> (match_dup:<F2I> 4)
+ (match_dup:<F2I> 2)))]
{
- operands[3] = spu_gen_subreg (<f2i>mode, operands[0]);
- operands[4] = spu_gen_subreg (<f2i>mode, operands[1]);
+ operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
+ operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
})
@@ -1026,8 +1051,8 @@
(abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
(use (match_dup 2))])]
""
- "operands[2] = gen_reg_rtx (<f2i>mode);
- emit_move_insn (operands[2], spu_const (<f2i>mode, 0x7fffffffull));")
+ "operands[2] = gen_reg_rtx (<F2I>mode);
+ emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
(define_expand "abs<mode>2"
[(parallel
@@ -1035,22 +1060,22 @@
(abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
(use (match_dup 2))])]
""
- "operands[2] = gen_reg_rtx (<f2i>mode);
- emit_move_insn (operands[2], spu_const (<f2i>mode, 0x7fffffffffffffffull));")
+ "operands[2] = gen_reg_rtx (<F2I>mode);
+ emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
(define_insn_and_split "_abs<mode>2"
[(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
(abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
- (use (match_operand:<f2i> 2 "spu_reg_operand" "r"))]
+ (use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
""
"#"
""
- [(set (match_dup:<f2i> 3)
- (and:<f2i> (match_dup:<f2i> 4)
- (match_dup:<f2i> 2)))]
+ [(set (match_dup:<F2I> 3)
+ (and:<F2I> (match_dup:<F2I> 4)
+ (match_dup:<F2I> 2)))]
{
- operands[3] = spu_gen_subreg (<f2i>mode, operands[0]);
- operands[4] = spu_gen_subreg (<f2i>mode, operands[1]);
+ operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
+ operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
})
@@ -2493,27 +2518,173 @@
(set_attr "length" "12")])
(define_insn "ceq_<mode>"
- [(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
- (eq:<f2i> (match_operand:VSF 1 "spu_reg_operand" "r")
+ [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+ (eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
(match_operand:VSF 2 "spu_reg_operand" "r")))]
""
"fceq\t%0,%1,%2")
(define_insn "cmeq_<mode>"
- [(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
- (eq:<f2i> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+ [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+ (eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
(abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
""
"fcmeq\t%0,%1,%2")
-(define_insn "ceq_vec"
+;; These implementations of ceq_df and cgt_df do not correctly handle
+;; NAN or INF. We will also get incorrect results when the result
+;; of the double subtract is too small.
+(define_expand "ceq_df"
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
- (eq:SI (match_operand 1 "spu_reg_operand" "r")
- (match_operand 2 "spu_reg_operand" "r")))]
- "VECTOR_MODE_P(GET_MODE(operands[1]))
- && GET_MODE(operands[1]) == GET_MODE(operands[2])"
- "ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
- [(set_attr "length" "12")])
+ (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
+ (match_operand:DF 2 "const_zero_operand" "i")))]
+ ""
+{
+ if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
+ {
+ rtx s0_ti = gen_reg_rtx(TImode);
+ rtx s1_v4 = gen_reg_rtx(V4SImode);
+ rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
+ rtx to_ti = gen_reg_rtx(TImode);
+ rtx to_v4 = gen_reg_rtx(V4SImode);
+ rtx l_v4 = gen_reg_rtx(V4SImode);
+ emit_insn (gen_spu_convert (l_v4, operands[1]));
+ emit_insn (gen_movv4si(s1_v4, spu_const(V4SImode, -0x80000000ll)));
+ emit_insn (gen_ceq_v4si(s0_v4, l_v4, CONST0_RTX(V4SImode)));
+ emit_insn (gen_ceq_v4si(s1_v4, l_v4, s1_v4));
+ emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
+ emit_insn (gen_spu_convert (to_v4, to_ti));
+ emit_insn (gen_iorv4si3(s1_v4, s0_v4, s1_v4));
+ emit_insn (gen_andv4si3(to_v4, to_v4, s1_v4));
+ emit_insn (gen_spu_convert (operands[0], to_v4));
+ DONE;
+ }
+})
+
+(define_insn "ceq_<mode>_celledp"
+ [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+ (eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
+ (match_operand:VDF 2 "spu_reg_operand" "r")))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dfceq\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_insn "cmeq_<mode>_celledp"
+ [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+ (eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
+ (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dfcmeq\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_expand "ceq_v2df"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
+ (match_operand:V2DF 2 "spu_reg_operand" "r")))]
+ ""
+{
+ if (spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx biteq = gen_reg_rtx (V4SImode);
+ rtx ahi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+ rtx iszero = gen_reg_rtx (V4SImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hihi_promote = gen_reg_rtx (TImode);
+
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
+ 0x08090A0B, 0x18191A1B);
+ emit_move_insn (hihi_promote, pat);
+
+ emit_insn (gen_ceq_v4si (biteq, ra, rb));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+ emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
+ emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+ emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
+ emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+ emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+ DONE;
+ }
+})
+
+(define_expand "cmeq_v2df"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+ (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
+ ""
+{
+ if(spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx biteq = gen_reg_rtx (V4SImode);
+ rtx ahi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hihi_promote = gen_reg_rtx (TImode);
+
+ emit_move_insn (sign_mask, pat);
+
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
+ 0x08090A0B, 0x18191A1B);
+ emit_move_insn (hihi_promote, pat);
+
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+ emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
+ emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+ DONE;
+ }
+})
;; cgt
@@ -2584,19 +2755,215 @@ selb\t%0,%5,%0,%3"
(set_attr "length" "36")])
(define_insn "cgt_<mode>"
- [(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
- (gt:<f2i> (match_operand:VSF 1 "spu_reg_operand" "r")
+ [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+ (gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
(match_operand:VSF 2 "spu_reg_operand" "r")))]
""
"fcgt\t%0,%1,%2")
(define_insn "cmgt_<mode>"
- [(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
- (gt:<f2i> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+ [(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
+ (gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
(abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
""
"fcmgt\t%0,%1,%2")
+(define_expand "cgt_df"
+ [(set (match_operand:SI 0 "spu_reg_operand" "=r")
+ (gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
+ (match_operand:DF 2 "const_zero_operand" "i")))]
+ ""
+{
+ if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
+ {
+ rtx s0_ti = gen_reg_rtx(TImode);
+ rtx s1_v4 = gen_reg_rtx(V4SImode);
+ rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
+ rtx to_ti = gen_reg_rtx(TImode);
+ rtx to_v4 = gen_reg_rtx(V4SImode);
+ rtx l_v4 = gen_reg_rtx(V4SImode);
+ emit_insn (gen_spu_convert(l_v4, operands[1]));
+ emit_insn (gen_ceq_v4si(s0_v4, l_v4, const0_rtx));
+ emit_insn (gen_cgt_v4si(s1_v4, l_v4, const0_rtx));
+ emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
+ emit_insn (gen_spu_convert(to_v4, to_ti));
+ emit_insn (gen_andc_v4si(to_v4, s0_v4, to_v4));
+ emit_insn (gen_iorv4si3(to_v4, to_v4, s1_v4));
+ emit_insn (gen_spu_convert(operands[0], to_v4));
+ DONE;
+ }
+})
+
+(define_insn "cgt_<mode>_celledp"
+ [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+ (gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
+ (match_operand:VDF 2 "spu_reg_operand" "r")))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dfcgt\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_insn "cmgt_<mode>_celledp"
+ [(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
+ (gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
+ (abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dfcmgt\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_expand "cgt_v2df"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
+ (match_operand:V2DF 2 "spu_reg_operand" "r")))]
+ ""
+{
+ if(spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+ rtx zero = gen_reg_rtx (V4SImode);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx hi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx b_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+ rtx asel = gen_reg_rtx (V4SImode);
+ rtx bsel = gen_reg_rtx (V4SImode);
+ rtx abor = gen_reg_rtx (V4SImode);
+ rtx bbor = gen_reg_rtx (V4SImode);
+ rtx gt_hi = gen_reg_rtx (V4SImode);
+ rtx gt_lo = gen_reg_rtx (V4SImode);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hi_promote = gen_reg_rtx (TImode);
+ rtx borrow_shuffle = gen_reg_rtx (TImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+ 0x08090A0B, 0x08090A0B);
+ emit_move_insn (hi_promote, pat);
+ pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
+ 0x0C0D0E0F, 0xC0C0C0C0);
+ emit_move_insn (borrow_shuffle, pat);
+
+ emit_insn (gen_andv4si3 (a_nan, ra, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, a_nan, nan_mask));
+ emit_insn (gen_clgt_v4si (a_nan, a_nan, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+ emit_insn (gen_andv4si3 (b_nan, rb, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, b_nan, nan_mask));
+ emit_insn (gen_clgt_v4si (b_nan, b_nan, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+ emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+ emit_move_insn (zero, CONST0_RTX (V4SImode));
+ emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+ emit_insn (gen_shufb (asel, asel, asel, hi_promote));
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_bg_v4si (abor, zero, a_abs));
+ emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
+ emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
+ emit_insn (gen_selb (abor, a_abs, abor, asel));
+ emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+ emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_bg_v4si (bbor, zero, b_abs));
+ emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
+ emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
+ emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
+ emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
+ emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
+ emit_insn (gen_ceq_v4si (temp2, abor, bbor));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+ emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+
+ emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+ emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+ emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
+ DONE;
+ }
+})
+
+(define_expand "cmgt_v2df"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
+ (abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
+ ""
+{
+ if(spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx rb = spu_gen_subreg (V4SImode, operands[2]);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx hi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx b_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+ rtx gt_hi = gen_reg_rtx (V4SImode);
+ rtx gt_lo = gen_reg_rtx (V4SImode);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hi_promote = gen_reg_rtx (TImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+ 0x08090A0B, 0x08090A0B);
+ emit_move_insn (hi_promote, pat);
+
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+ emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+ emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+
+ emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
+ emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
+ emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+ emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+ emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+ emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+ emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
+ DONE;
+ }
+})
+
;; clgt
@@ -2656,6 +3023,150 @@ selb\t%0,%4,%0,%3"
(set_attr "length" "32")])
+;; dftsv
+(define_insn "dftsv_celledp"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (unspec [(match_operand:V2DF 1 "spu_reg_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))]
+ "spu_arch == PROCESSOR_CELLEDP"
+ "dftsv\t%0,%1,%2"
+ [(set_attr "type" "fpd")])
+
+(define_expand "dftsv"
+ [(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
+ (unspec [(match_operand:V2DF 1 "spu_reg_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))]
+ ""
+{
+ if(spu_arch == PROCESSOR_CELL)
+ {
+ rtx result = gen_reg_rtx (V4SImode);
+ emit_move_insn (result, CONST0_RTX (V4SImode));
+
+ if (INTVAL (operands[2]))
+ {
+ rtx ra = spu_gen_subreg (V4SImode, operands[1]);
+ rtx abs = gen_reg_rtx (V4SImode);
+ rtx sign = gen_reg_rtx (V4SImode);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx hi_promote = gen_reg_rtx (TImode);
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+ 0x08090A0B, 0x08090A0B);
+ emit_move_insn (hi_promote, pat);
+
+ emit_insn (gen_ashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
+ emit_insn (gen_shufb (sign, sign, sign, hi_promote));
+ emit_insn (gen_andv4si3 (abs, ra, sign_mask));
+
+ /* NaN or +inf or -inf */
+ if (INTVAL (operands[2]) & 0x70)
+ {
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx isinf = gen_reg_rtx (V4SImode);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
+
+ /* NaN */
+ if (INTVAL (operands[2]) & 0x40)
+ {
+ rtx isnan = gen_reg_rtx (V4SImode);
+ emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
+ emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
+ emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
+ emit_insn (gen_iorv4si3 (result, result, isnan));
+ }
+ /* +inf or -inf */
+ if (INTVAL (operands[2]) & 0x30)
+ {
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
+ emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
+
+ /* +inf */
+ if (INTVAL (operands[2]) & 0x20)
+ {
+ emit_insn (gen_andc_v4si (temp2, isinf, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ /* -inf */
+ if (INTVAL (operands[2]) & 0x10)
+ {
+ emit_insn (gen_andv4si3 (temp2, isinf, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ }
+ }
+
+ /* 0 or denorm */
+ if (INTVAL (operands[2]) & 0xF)
+ {
+ rtx iszero = gen_reg_rtx (V4SImode);
+ emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+
+ /* denorm */
+ if (INTVAL (operands[2]) & 0x3)
+ {
+ rtx isdenorm = gen_reg_rtx (V4SImode);
+ rtx denorm_mask = gen_reg_rtx (V4SImode);
+ emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
+ emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
+ emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
+ emit_insn (gen_shufb (isdenorm, isdenorm,
+ isdenorm, hi_promote));
+ /* +denorm */
+ if (INTVAL (operands[2]) & 0x2)
+ {
+ emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ /* -denorm */
+ if (INTVAL (operands[2]) & 0x1)
+ {
+ emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ }
+
+ /* 0 */
+ if (INTVAL (operands[2]) & 0xC)
+ {
+ emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
+ /* +0 */
+ if (INTVAL (operands[2]) & 0x8)
+ {
+ emit_insn (gen_andc_v4si (temp2, iszero, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ /* -0 */
+ if (INTVAL (operands[2]) & 0x4)
+ {
+ emit_insn (gen_andv4si3 (temp2, iszero, sign));
+ emit_insn (gen_iorv4si3 (result, result, temp2));
+ }
+ }
+ }
+ }
+ emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
+ DONE;
+ }
+})
+
+
;; branches
(define_insn ""
@@ -2747,6 +3258,53 @@ selb\t%0,%4,%0,%3"
DONE;
})
+(define_expand "cmpdf"
+ [(set (cc0)
+ (compare (match_operand:DF 0 "register_operand" "")
+ (match_operand:DF 1 "register_operand" "")))]
+ "(flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
+ || spu_arch == PROCESSOR_CELLEDP "
+ "{
+ spu_compare_op0 = operands[0];
+ spu_compare_op1 = operands[1];
+ DONE;
+}")
+
+;; vector conditional compare patterns
+(define_expand "vcond<mode>"
+ [(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
+ (if_then_else:VCMP
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VCMP 4 "spu_reg_operand" "r")
+ (match_operand:VCMP 5 "spu_reg_operand" "r")])
+ (match_operand:VCMP 1 "spu_reg_operand" "r")
+ (match_operand:VCMP 2 "spu_reg_operand" "r")))]
+ ""
+ {
+ if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+ operands[3], operands[4], operands[5]))
+ DONE;
+ else
+ FAIL;
+ })
+
+(define_expand "vcondu<mode>"
+ [(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
+ (if_then_else:VCMPU
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VCMPU 4 "spu_reg_operand" "r")
+ (match_operand:VCMPU 5 "spu_reg_operand" "r")])
+ (match_operand:VCMPU 1 "spu_reg_operand" "r")
+ (match_operand:VCMPU 2 "spu_reg_operand" "r")))]
+ ""
+ {
+ if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
+ operands[3], operands[4], operands[5]))
+ DONE;
+ else
+ FAIL;
+ })
+
;; branch on condition
@@ -3376,7 +3934,7 @@ selb\t%0,%4,%0,%3"
(define_expand "sminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=r")
- (smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
+ (smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
(match_operand:V4SF 2 "register_operand" "r")))]
""
"
@@ -3388,6 +3946,34 @@ selb\t%0,%4,%0,%3"
DONE;
}")
+(define_expand "smaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=r")
+ (smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
+ (match_operand:V2DF 2 "register_operand" "r")))]
+ ""
+ "
+{
+ rtx mask = gen_reg_rtx (V2DImode);
+ emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
+ emit_insn (gen_selb (operands[0], operands[2], operands[1],
+ spu_gen_subreg (V4SImode, mask)));
+ DONE;
+}")
+
+(define_expand "sminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=r")
+ (smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
+ (match_operand:V2DF 2 "register_operand" "r")))]
+ ""
+ "
+{
+ rtx mask = gen_reg_rtx (V2DImode);
+ emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
+ emit_insn (gen_selb (operands[0], operands[1], operands[2],
+ spu_gen_subreg (V4SImode, mask)));
+ DONE;
+}")
+
(define_expand "vec_widen_umult_hi_v8hi"
[(set (match_operand:V4SI 0 "register_operand" "=r")
(mult:V4SI
diff --git a/gcc/config/spu/spu.opt b/gcc/config/spu/spu.opt
index 6c3ab59..e8c11d1 100644
--- a/gcc/config/spu/spu.opt
+++ b/gcc/config/spu/spu.opt
@@ -55,3 +55,11 @@ Generate code for 32 bit addressing
mfixed-range=
Target RejectNegative Joined Var(spu_fixed_range_string)
Specify range of registers to make fixed
+
+march=
+Target RejectNegative Joined Var(spu_arch_string)
+Generate code for given CPU
+
+mtune=
+Target RejectNegative Joined Var(spu_tune_string)
+Schedule code for given CPU
diff --git a/gcc/config/spu/spu_internals.h b/gcc/config/spu/spu_internals.h
index ecc8dc5..fb42c87 100644
--- a/gcc/config/spu/spu_internals.h
+++ b/gcc/config/spu/spu_internals.h
@@ -233,6 +233,15 @@
#define si_rchcnt(imm) __builtin_si_rchcnt(imm)
#define si_wrch(imm,ra) __builtin_si_wrch(imm,ra)
+/* celledp only instructions */
+#ifdef __SPU_EDP__
+#define si_dfceq(ra,rb) __builtin_si_dfceq(ra,rb)
+#define si_dfcmeq(ra,rb) __builtin_si_dfcmeq(ra,rb)
+#define si_dfcgt(ra,rb) __builtin_si_dfcgt(ra,rb)
+#define si_dfcmgt(ra,rb) __builtin_si_dfcmgt(ra,rb)
+#define si_dftsv(ra,imm) __builtin_si_dftsv(ra,imm)
+#endif /* __SPU_EDP__ */
+
#define si_from_char(scalar) __builtin_si_from_char(scalar)
#define si_from_uchar(scalar) __builtin_si_from_uchar(scalar)
#define si_from_short(scalar) __builtin_si_from_short(scalar)
@@ -295,6 +304,7 @@
#define spu_cmpabsgt(ra,rb) __builtin_spu_cmpabsgt(ra,rb)
#define spu_cmpeq(ra,rb) __builtin_spu_cmpeq(ra,rb)
#define spu_cmpgt(ra,rb) __builtin_spu_cmpgt(ra,rb)
+#define spu_testsv(ra,imm) __builtin_spu_testsv(ra,imm)
#define spu_hcmpeq(ra,rb) __builtin_spu_hcmpeq(ra,rb)
#define spu_hcmpgt(ra,rb) __builtin_spu_hcmpgt(ra,rb)
#define spu_cntb(ra) __builtin_spu_cntb(ra)
diff --git a/gcc/config/spu/spu_intrinsics.h b/gcc/config/spu/spu_intrinsics.h
index ca91927..faaf8a6 100644
--- a/gcc/config/spu/spu_intrinsics.h
+++ b/gcc/config/spu/spu_intrinsics.h
@@ -70,6 +70,16 @@
#define MFC_WrListStallAck 26
#define MFC_RdAtomicStat 27
+/* Bit flag mnemonics for test special value.
+ */
+#define SPU_SV_NEG_DENORM 0x01 /* negative denormalized number */
+#define SPU_SV_POS_DENORM 0x02 /* positive denormalized number */
+#define SPU_SV_NEG_ZERO 0x04 /* negative zero */
+#define SPU_SV_POS_ZERO 0x08 /* positive zero */
+#define SPU_SV_NEG_INFINITY 0x10 /* negative infinity */
+#define SPU_SV_POS_INFINITY 0x20 /* positive infinity */
+#define SPU_SV_NAN 0x40 /* not a number */
+
#include <spu_internals.h>
#endif /* _SPU_INTRINSICS_H */