aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c140
-rw-r--r--gcc/config/i386/i386.md114
-rw-r--r--gcc/config/i386/sse.md20
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/math-torture/ceil.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/math-torture/floor.c15
8 files changed, 295 insertions, 32 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 64d7f6e..8d776d5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,21 @@
2006-10-29 Richard Guenther <rguenther@suse.de>
+ * config/i386/i386-protos.h (ix86_expand_floorceil): Declare.
+ (ix86_expand_floorceildf_32): Likewise.
+ * config/i386/i386.c (ix86_expand_sse_compare_mask): New
+ static helper function.
+ (ix86_expand_floorceil): Expander for floor and ceil to SSE
+ math.
+ (ix86_expand_floorceildf_32): Same for DFmode on 32bit archs.
+ * config/i386/i386.md (floordf2): Adjust to enable floor
+ expansion via ix86_expand_floorceil if TARGET_SSE_MATH and
+ -fno-trapping-math is enabled and if not optimizing for size.
+ (floorsf2, ceildf2, ceilsf2): Likewise.
+ * config/i386/sse.md (sse_maskcmpsf3): New insn.
+ (sse2_maskcmpdf3): Likewise.
+
+2006-10-29 Richard Guenther <rguenther@suse.de>
+
* builtins.c (expand_builtin_mathfn): Expand nearbyint as
rint in case -fno-trapping-math is enabled.
* config/i386/i386-protos.h (ix86_expand_rint): Declare.
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 4cb110c..a6f760c 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -160,6 +160,8 @@ extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
extern void ix86_expand_lround (rtx, rtx);
extern void ix86_expand_lfloorceil (rtx, rtx, bool);
extern void ix86_expand_rint (rtx, rtx);
+extern void ix86_expand_floorceil (rtx, rtx, bool);
+extern void ix86_expand_floorceildf_32 (rtx, rtx, bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a124536..6a125da 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19310,6 +19310,33 @@ ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
return label;
}
+/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
+ using comparison code CODE. Operands are swapped for the comparison if
+ SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
+static rtx
+ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
+ bool swap_operands)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ rtx mask = gen_reg_rtx (mode);
+
+ if (swap_operands)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
+
+ if (mode == DFmode)
+ emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
+ gen_rtx_fmt_ee (code, mode, op0, op1)));
+ else
+ emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
+ gen_rtx_fmt_ee (code, mode, op0, op1)));
+
+ return mask;
+}
+
/* Generate and return a rtx of mode MODE for 2**n where n is the number
of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
static rtx
@@ -19423,4 +19450,117 @@ ix86_expand_rint (rtx operand0, rtx operand1)
emit_move_insn (operand0, res);
}
+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
+{
+ /* C code for the stuff we expand below.
+ double xa = fabs (x), x2;
+ if (!isless (xa, TWO52))
+ return x;
+ xa = xa + TWO52 - TWO52;
+ x2 = copysign (xa, x);
+ Compensate. Floor:
+ if (x2 > x)
+ x2 -= 1;
+ Compensate. Ceil:
+ if (x2 < x)
+ x2 += 1;
+ return x2;
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx xa, TWO52, tmp, label, one, res, mask;
+
+ TWO52 = ix86_gen_TWO52 (mode);
+
+ /* Temporary for holding the result, initialized to the input
+ operand to ease control flow. */
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, &mask);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ /* xa = xa + TWO52 - TWO52; */
+ expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+ expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+
+ /* xa = copysign (xa, operand1) */
+ ix86_sse_copysign_to_positive (xa, xa, res, mask);
+
+ /* generate 1.0 */
+ one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+ /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
+ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_AND (mode, one, tmp)));
+ expand_simple_binop (mode, do_floor ? MINUS : PLUS,
+ xa, tmp, res, 0, OPTAB_DIRECT);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
+{
+ /* C code for the stuff we expand below.
+ double xa = fabs (x), x2;
+ if (!isless (xa, TWO52))
+ return x;
+ x2 = (double)(long)x;
+ Compensate. Floor:
+ if (x2 > x)
+ x2 -= 1;
+ Compensate. Ceil:
+ if (x2 < x)
+ x2 += 1;
+ return x2;
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx xa, xi, TWO52, tmp, label, one, res;
+
+ TWO52 = ix86_gen_TWO52 (mode);
+
+ /* Temporary for holding the result, initialized to the input
+ operand to ease control flow. */
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, NULL);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ /* xa = (double)(long)x */
+ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+ expand_fix (xi, res, 0);
+ expand_float (xa, xi, 0);
+
+ /* generate 1.0 */
+ one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+ /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
+ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_AND (mode, one, tmp)));
+ expand_simple_binop (mode, do_floor ? MINUS : PLUS,
+ xa, tmp, res, 0, OPTAB_DIRECT);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
#include "gt-i386.h"
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5aaf43b..99ab0db 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17435,34 +17435,59 @@
(define_expand "floordf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DF 1 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations"
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !optimize_size)"
{
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
+ if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !optimize_size)
+ {
+ if (TARGET_64BIT)
+ ix86_expand_floorceil (operand0, operand1, true);
+ else
+ ix86_expand_floorceildf_32 (operand0, operand1, true);
+ }
+ else
+ {
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_floor (op0, op1));
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_floor (op0, op1));
- emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ }
DONE;
})
(define_expand "floorsf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SF 1 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations"
+ "(TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !optimize_size)"
{
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
+ if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !optimize_size)
+ ix86_expand_floorceil (operand0, operand1, true);
+ else
+ {
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extendsfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_floor (op0, op1));
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_floor (op0, op1));
- emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ }
DONE;
})
@@ -17696,34 +17721,59 @@
(define_expand "ceildf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DF 1 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations"
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !optimize_size)"
{
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
+ if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !optimize_size)
+ {
+ if (TARGET_64BIT)
+ ix86_expand_floorceil (operand0, operand1, false);
+ else
+ ix86_expand_floorceildf_32 (operand0, operand1, false);
+ }
+ else
+ {
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_ceil (op0, op1));
+ emit_insn (gen_extenddfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_ceil (op0, op1));
- emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+ }
DONE;
})
(define_expand "ceilsf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SF 1 "register_operand" ""))]
- "TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations"
+ "(TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !optimize_size)"
{
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
+ if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !optimize_size)
+ ix86_expand_floorceil (operand0, operand1, false);
+ else
+ {
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extendsfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_ceil (op0, op1));
+ emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_ceil (op0, op1));
- emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ }
DONE;
})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8f5786a..78976ed 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -733,6 +733,16 @@
[(set_attr "type" "ssecmp")
(set_attr "mode" "V4SF")])
+(define_insn "sse_maskcmpsf3"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (match_operator:SF 3 "sse_comparison_operator"
+ [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE"
+ "cmp%D3ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "SF")])
+
(define_insn "sse_vmmaskcmpv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
@@ -1718,6 +1728,16 @@
[(set_attr "type" "ssecmp")
(set_attr "mode" "V2DF")])
+(define_insn "sse2_maskcmpdf3"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (match_operator:DF 3 "sse_comparison_operator"
+ [(match_operand:DF 1 "register_operand" "0")
+ (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE2"
+ "cmp%D3sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "DF")])
+
(define_insn "sse2_vmmaskcmpv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2fad67c..55d7f95 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@
2006-10-29 Richard Guenther <rguenther@suse.de>
+ * gcc.target/i386/math-torture/ceil.c: New testcase.
+ * gcc.target/i386/math-torture/floor.c: Likewise.
+
+2006-10-29 Richard Guenther <rguenther@suse.de>
+
* gcc.target/i386/math-torture/rint.c: New testcase.
* gcc.target/i386/math-torture/nearbyint.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/i386/math-torture/ceil.c b/gcc/testsuite/gcc.target/i386/math-torture/ceil.c
new file mode 100644
index 0000000..dfccd7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/math-torture/ceil.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+ return __builtin_ceilf (x);
+}
+double testl (double x)
+{
+ return __builtin_ceil (x);
+}
+long double testll (long double x)
+{
+ return __builtin_ceill (x);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/math-torture/floor.c b/gcc/testsuite/gcc.target/i386/math-torture/floor.c
new file mode 100644
index 0000000..0c3aa91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/math-torture/floor.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble } */
+
+float testlf (float x)
+{
+ return __builtin_floorf (x);
+}
+double testl (double x)
+{
+ return __builtin_floor (x);
+}
+long double testll (long double x)
+{
+ return __builtin_floorl (x);
+}
+