aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorBen Elliston <bje@gcc.gnu.org>2007-11-15 23:22:04 +1100
committerBen Elliston <bje@gcc.gnu.org>2007-11-15 23:22:04 +1100
commitdd4ba939839ddf0b34b6ea847c55f6b7798aefb9 (patch)
tree0f57310f8091bf926ee60593a87870c6561896f3 /gcc
parent4a648c5d7df866a1e1b25aa1d73b1b3651461f8a (diff)
downloadgcc-dd4ba939839ddf0b34b6ea847c55f6b7798aefb9.zip
gcc-dd4ba939839ddf0b34b6ea847c55f6b7798aefb9.tar.gz
gcc-dd4ba939839ddf0b34b6ea847c55f6b7798aefb9.tar.bz2
spu.c (spu_emit_branch_or_set): Remove PROCESSOR_CELL condition from HONOR_NANS test.
* config/spu/spu.c (spu_emit_branch_or_set): Remove PROCESSOR_CELL condition from HONOR_NANS test. * config/spu/spu.md (ceq_df): Always generate comparison code inline, including checks for NaNs and infinities. (cgt_df): Likewise. (cgt_v2df): Likewise. (cmpdf): Make this expander unconditional. testsuite/ * gcc.target/spu/compare-dp.c: New test. From-SVN: r130198
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/spu/spu.c24
-rw-r--r--gcc/config/spu/spu.md209
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/spu/compare-dp.c10
5 files changed, 190 insertions, 67 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c3aa6ec..23ba940 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2007-11-15 Sa Liu <saliu@de.ibm.com>
+
+ * config/spu/spu.c (spu_emit_branch_or_set): Remove PROCESSOR_CELL
+ condition from HONOR_NANS test.
+ * config/spu/spu.md (ceq_df): Always generate comparison code
+ inline, including checks for NaNs and infinities.
+ (cgt_df): Likewise.
+ (cgt_v2df): Likewise.
+ (cmpdf): Make this expander unconditional.
+
2007-11-15 Richard Guenther <rguenther@suse.de>
* tree-ssa-alias.c (create_overlap_variables_for): Make sure
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index bf2e9e8..1f2efb6 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -765,7 +765,7 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
{
case GE:
scode = SPU_GT;
- if (HONOR_NANS (op_mode) && spu_arch == PROCESSOR_CELLEDP)
+ if (HONOR_NANS (op_mode))
{
reverse_compare = 0;
reverse_test = 0;
@@ -780,7 +780,7 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
break;
case LE:
scode = SPU_GT;
- if (HONOR_NANS (op_mode) && spu_arch == PROCESSOR_CELLEDP)
+ if (HONOR_NANS (op_mode))
{
reverse_compare = 1;
reverse_test = 0;
@@ -883,23 +883,9 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
abort ();
}
- if (GET_MODE (spu_compare_op1) == DFmode)
- {
- rtx reg = gen_reg_rtx (DFmode);
- if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
- || (scode != SPU_GT && scode != SPU_EQ))
- abort ();
- if (spu_arch == PROCESSOR_CELL)
- {
- if (reverse_compare)
- emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
- else
- emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
- reverse_compare = 0;
- spu_compare_op0 = reg;
- spu_compare_op1 = CONST0_RTX (DFmode);
- }
- }
+ if (GET_MODE (spu_compare_op1) == DFmode
+ && (scode != SPU_GT && scode != SPU_EQ))
+ abort ();
if (is_set == 0 && spu_compare_op1 == const0_rtx
&& (GET_MODE (spu_compare_op0) == SImode
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index ad2d78e..827e5e8 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -2534,34 +2534,72 @@
""
"fcmeq\t%0,%1,%2")
-;; These implementations of ceq_df and cgt_df do not correctly handle
-;; NAN or INF. We will also get incorrect results when the result
-;; of the double subtract is too small.
+;; These implementations will ignore checking of NaN or INF if
+;; compiled with option -ffinite-math-only.
(define_expand "ceq_df"
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
(eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
(match_operand:DF 2 "const_zero_operand" "i")))]
""
{
- if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
- {
- rtx s0_ti = gen_reg_rtx(TImode);
- rtx s1_v4 = gen_reg_rtx(V4SImode);
- rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
- rtx to_ti = gen_reg_rtx(TImode);
- rtx to_v4 = gen_reg_rtx(V4SImode);
- rtx l_v4 = gen_reg_rtx(V4SImode);
- emit_insn (gen_spu_convert (l_v4, operands[1]));
- emit_insn (gen_movv4si(s1_v4, spu_const(V4SImode, -0x80000000ll)));
- emit_insn (gen_ceq_v4si(s0_v4, l_v4, CONST0_RTX(V4SImode)));
- emit_insn (gen_ceq_v4si(s1_v4, l_v4, s1_v4));
- emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
- emit_insn (gen_spu_convert (to_v4, to_ti));
- emit_insn (gen_iorv4si3(s1_v4, s0_v4, s1_v4));
- emit_insn (gen_andv4si3(to_v4, to_v4, s1_v4));
- emit_insn (gen_spu_convert (operands[0], to_v4));
- DONE;
- }
+ if (spu_arch == PROCESSOR_CELL)
+ {
+ rtx ra = gen_reg_rtx (V4SImode);
+ rtx rb = gen_reg_rtx (V4SImode);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx biteq = gen_reg_rtx (V4SImode);
+ rtx ahi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+ rtx iszero = gen_reg_rtx (V4SImode);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hihi_promote = gen_reg_rtx (TImode);
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
+ 0x08090A0B, 0x18191A1B);
+ emit_move_insn (hihi_promote, pat);
+
+ emit_insn (gen_spu_convert (ra, operands[1]));
+ emit_insn (gen_spu_convert (rb, operands[2]));
+ emit_insn (gen_ceq_v4si (biteq, ra, rb));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
+
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ if (!flag_finite_math_only)
+ {
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+ emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ }
+ emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
+ emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
+ emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
+ if (!flag_finite_math_only)
+ {
+ emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+ }
+ emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
+ DONE;
+ }
})
(define_insn "ceq_<mode>_celledp"
@@ -2777,24 +2815,102 @@ selb\t%0,%5,%0,%3"
(match_operand:DF 2 "const_zero_operand" "i")))]
""
{
- if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
+ if (spu_arch == PROCESSOR_CELL)
{
- rtx s0_ti = gen_reg_rtx(TImode);
- rtx s1_v4 = gen_reg_rtx(V4SImode);
- rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
- rtx to_ti = gen_reg_rtx(TImode);
- rtx to_v4 = gen_reg_rtx(V4SImode);
- rtx l_v4 = gen_reg_rtx(V4SImode);
- emit_insn (gen_spu_convert(l_v4, operands[1]));
- emit_insn (gen_ceq_v4si(s0_v4, l_v4, const0_rtx));
- emit_insn (gen_cgt_v4si(s1_v4, l_v4, const0_rtx));
- emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
- emit_insn (gen_spu_convert(to_v4, to_ti));
- emit_insn (gen_andc_v4si(to_v4, s0_v4, to_v4));
- emit_insn (gen_iorv4si3(to_v4, to_v4, s1_v4));
- emit_insn (gen_spu_convert(operands[0], to_v4));
+ rtx ra = gen_reg_rtx (V4SImode);
+ rtx rb = gen_reg_rtx (V4SImode);
+ rtx zero = gen_reg_rtx (V4SImode);
+ rtx temp = gen_reg_rtx (TImode);
+ rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
+ rtx temp2 = gen_reg_rtx (V4SImode);
+ rtx hi_inf = gen_reg_rtx (V4SImode);
+ rtx a_nan = gen_reg_rtx (V4SImode);
+ rtx b_nan = gen_reg_rtx (V4SImode);
+ rtx a_abs = gen_reg_rtx (V4SImode);
+ rtx b_abs = gen_reg_rtx (V4SImode);
+ rtx asel = gen_reg_rtx (V4SImode);
+ rtx bsel = gen_reg_rtx (V4SImode);
+ rtx abor = gen_reg_rtx (V4SImode);
+ rtx bbor = gen_reg_rtx (V4SImode);
+ rtx gt_hi = gen_reg_rtx (V4SImode);
+ rtx gt_lo = gen_reg_rtx (V4SImode);
+ rtx sign_mask = gen_reg_rtx (V4SImode);
+ rtx nan_mask = gen_reg_rtx (V4SImode);
+ rtx hi_promote = gen_reg_rtx (TImode);
+ rtx borrow_shuffle = gen_reg_rtx (TImode);
+
+ rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
+ 0x7FFFFFFF, 0xFFFFFFFF);
+ emit_move_insn (sign_mask, pat);
+ pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
+ 0x7FF00000, 0x0);
+ emit_move_insn (nan_mask, pat);
+ pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
+ 0x08090A0B, 0x08090A0B);
+ emit_move_insn (hi_promote, pat);
+ pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
+ 0x0C0D0E0F, 0xC0C0C0C0);
+ emit_move_insn (borrow_shuffle, pat);
+
+ emit_insn (gen_spu_convert (ra, operands[1]));
+ emit_insn (gen_spu_convert (rb, operands[2]));
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+
+ if (!flag_finite_math_only)
+ {
+ /* check if ra is NaN */
+ emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
+ emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
+
+ /* check if rb is NaN */
+ emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+ emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
+ emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
+ emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
+
+ /* check if ra or rb is NaN */
+ emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
+ }
+ emit_move_insn (zero, CONST0_RTX (V4SImode));
+ emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+ emit_insn (gen_shufb (asel, asel, asel, hi_promote));
+ emit_insn (gen_bg_v4si (abor, zero, a_abs));
+ emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
+ emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
+ emit_insn (gen_selb (abor, a_abs, abor, asel));
+
+ emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+ emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
+ emit_insn (gen_bg_v4si (bbor, zero, b_abs));
+ emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
+ emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
+ emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
+
+ emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
+ emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
+ emit_insn (gen_ceq_v4si (temp2, abor, bbor));
+ emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
+ GEN_INT (4 * 8)));
+ emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
+ emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
+ emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
+ if (!flag_finite_math_only)
+ {
+ /* correct for NaNs */
+ emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
+ }
+ emit_insn (gen_spu_convert (operands[0], temp2));
DONE;
- }
+ }
})
(define_insn "cgt_<mode>_celledp"
@@ -2855,17 +2971,17 @@ selb\t%0,%5,%0,%3"
0x0C0D0E0F, 0xC0C0C0C0);
emit_move_insn (borrow_shuffle, pat);
- emit_insn (gen_andv4si3 (a_nan, ra, sign_mask));
- emit_insn (gen_ceq_v4si (hi_inf, a_nan, nan_mask));
- emit_insn (gen_clgt_v4si (a_nan, a_nan, nan_mask));
+ emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
+ emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
- emit_insn (gen_andv4si3 (b_nan, rb, sign_mask));
- emit_insn (gen_ceq_v4si (hi_inf, b_nan, nan_mask));
- emit_insn (gen_clgt_v4si (b_nan, b_nan, nan_mask));
+ emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
+ emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
+ emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
@@ -2875,14 +2991,12 @@ selb\t%0,%5,%0,%3"
emit_move_insn (zero, CONST0_RTX (V4SImode));
emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
emit_insn (gen_shufb (asel, asel, asel, hi_promote));
- emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
emit_insn (gen_bg_v4si (abor, zero, a_abs));
emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
emit_insn (gen_selb (abor, a_abs, abor, asel));
emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
- emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
emit_insn (gen_bg_v4si (bbor, zero, b_abs));
emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
@@ -3267,8 +3381,7 @@ selb\t%0,%4,%0,%3"
[(set (cc0)
(compare (match_operand:DF 0 "register_operand" "")
(match_operand:DF 1 "register_operand" "")))]
- "(flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
- || spu_arch == PROCESSOR_CELLEDP "
+ ""
"{
spu_compare_op0 = operands[0];
spu_compare_op1 = operands[1];
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 66ac3de..d937f4f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2007-11-15 Ben Elliston <bje@au.ibm.com>
+
+ * gcc.target/spu/compare-dp.c: New test.
+
2007-11-14 Eric Botcazou <ebotcazou@libertysurf.fr>
* gcc.dg/pr33923.c: New test.
diff --git a/gcc/testsuite/gcc.target/spu/compare-dp.c b/gcc/testsuite/gcc.target/spu/compare-dp.c
new file mode 100644
index 0000000..cbc7663
--- /dev/null
+++ b/gcc/testsuite/gcc.target/spu/compare-dp.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-final { scan-assembler-not "__eqdf2" } } */
+
+/* Ensure double precision comparisons are always inlined. */
+
+int test (double a, double b) __attribute__((noinline));
+int test (double a, double b)
+{
+ return a == b;
+}