diff options
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 140 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 114 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 20 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/math-torture/ceil.c | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/math-torture/floor.c | 15 |
8 files changed, 295 insertions, 32 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 64d7f6e..8d776d5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,21 @@ 2006-10-29 Richard Guenther <rguenther@suse.de> + * config/i386/i386-protos.h (ix86_expand_floorceil): Declare. + (ix86_expand_floorceildf_32): Likewise. + * config/i386/i386.c (ix86_expand_sse_compare_mask): New + static helper function. + (ix86_expand_floorceil): Expander for floor and ceil to SSE + math. + (ix86_expand_floorceildf_32): Same for DFmode on 32bit archs. + * config/i386/i386.md (floordf2): Adjust to enable floor + expansion via ix86_expand_floorceil if TARGET_SSE_MATH and + -fno-trapping-math is enabled and if not optimizing for size. + (floorsf2, ceildf2, ceilsf2): Likewise. + * config/i386/sse.md (sse_maskcmpsf3): New insn. + (sse2_maskcmpdf3): Likewise. + +2006-10-29 Richard Guenther <rguenther@suse.de> + * builtins.c (expand_builtin_mathfn): Expand nearbyint as rint in case -fno-trapping-math is enabled. * config/i386/i386-protos.h (ix86_expand_rint): Declare. diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 4cb110c..a6f760c 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -160,6 +160,8 @@ extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode); extern void ix86_expand_lround (rtx, rtx); extern void ix86_expand_lfloorceil (rtx, rtx, bool); extern void ix86_expand_rint (rtx, rtx); +extern void ix86_expand_floorceil (rtx, rtx, bool); +extern void ix86_expand_floorceildf_32 (rtx, rtx, bool); #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a124536..6a125da 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19310,6 +19310,33 @@ ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, return label; } +/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 + using comparison code CODE. Operands are swapped for the comparison if + SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ +static rtx +ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, + bool swap_operands) +{ + enum machine_mode mode = GET_MODE (op0); + rtx mask = gen_reg_rtx (mode); + + if (swap_operands) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } + + if (mode == DFmode) + emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1, + gen_rtx_fmt_ee (code, mode, op0, op1))); + else + emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1, + gen_rtx_fmt_ee (code, mode, op0, op1))); + + return mask; +} + /* Generate and return a rtx of mode MODE for 2**n where n is the number of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ static rtx @@ -19423,4 +19450,117 @@ ix86_expand_rint (rtx operand0, rtx operand1) emit_move_insn (operand0, res); } +/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing + into OPERAND0. */ +void +ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) +{ + /* C code for the stuff we expand below. + double xa = fabs (x), x2; + if (!isless (xa, TWO52)) + return x; + xa = xa + TWO52 - TWO52; + x2 = copysign (xa, x); + Compensate. Floor: + if (x2 > x) + x2 -= 1; + Compensate. Ceil: + if (x2 < x) + x2 += 1; + return x2; + */ + enum machine_mode mode = GET_MODE (operand0); + rtx xa, TWO52, tmp, label, one, res, mask; + + TWO52 = ix86_gen_TWO52 (mode); + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, &mask); + + /* if (!isless (xa, TWO52)) goto label; */ + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* xa = xa + TWO52 - TWO52; */ + expand_simple_binop (mode, PLUS, xa, TWO52, xa, 0, OPTAB_DIRECT); + expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); + + /* xa = copysign (xa, operand1) */ + ix86_sse_copysign_to_positive (xa, xa, res, mask); + + /* generate 1.0 */ + one = force_reg (mode, const_double_from_real_value (dconst1, mode)); + + /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_AND (mode, one, tmp))); + expand_simple_binop (mode, do_floor ? MINUS : PLUS, + xa, tmp, res, 0, OPTAB_DIRECT); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + +/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing + into OPERAND0. */ +void +ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) +{ + /* C code for the stuff we expand below. + double xa = fabs (x), x2; + if (!isless (xa, TWO52)) + return x; + x2 = (double)(long)x; + Compensate. Floor: + if (x2 > x) + x2 -= 1; + Compensate. Ceil: + if (x2 < x) + x2 += 1; + return x2; + */ + enum machine_mode mode = GET_MODE (operand0); + rtx xa, xi, TWO52, tmp, label, one, res; + + TWO52 = ix86_gen_TWO52 (mode); + + /* Temporary for holding the result, initialized to the input + operand to ease control flow. */ + res = gen_reg_rtx (mode); + emit_move_insn (res, operand1); + + /* xa = abs (operand1) */ + xa = ix86_expand_sse_fabs (res, NULL); + + /* if (!isless (xa, TWO52)) goto label; */ + label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); + + /* xa = (double)(long)x */ + xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); + expand_fix (xi, res, 0); + expand_float (xa, xi, 0); + + /* generate 1.0 */ + one = force_reg (mode, const_double_from_real_value (dconst1, mode)); + + /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ + tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_AND (mode, one, tmp))); + expand_simple_binop (mode, do_floor ? MINUS : PLUS, + xa, tmp, res, 0, OPTAB_DIRECT); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operand0, res); +} + #include "gt-i386.h" diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 5aaf43b..99ab0db 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17435,34 +17435,59 @@ (define_expand "floordf2" [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DF 1 "register_operand" ""))] - "TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" + "(TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH + && !flag_trapping_math + && !optimize_size)" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH + && !flag_trapping_math + && !optimize_size) + { + if (TARGET_64BIT) + ix86_expand_floorceil (operand0, operand1, true); + else + ix86_expand_floorceildf_32 (operand0, operand1, true); + } + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_floor (op0, op1)); + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_floor (op0, op1)); - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + } DONE; }) (define_expand "floorsf2" [(use (match_operand:SF 0 "register_operand" "")) (use (match_operand:SF 1 "register_operand" ""))] - "TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" + "(TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH + && !flag_trapping_math + && !optimize_size)" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH + && !flag_trapping_math + && !optimize_size) + ix86_expand_floorceil (operand0, operand1, true); + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_floor (op0, op1)); + emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_floor (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + } DONE; }) @@ -17696,34 +17721,59 @@ (define_expand "ceildf2" [(use (match_operand:DF 0 "register_operand" "")) (use (match_operand:DF 1 "register_operand" ""))] - "TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" + "(TARGET_USE_FANCY_MATH_387 + && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH + && !flag_trapping_math + && !optimize_size)" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH + && !flag_trapping_math + && !optimize_size) + { + if (TARGET_64BIT) + ix86_expand_floorceil (operand0, operand1, false); + else + ix86_expand_floorceildf_32 (operand0, operand1, false); + } + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extenddfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_ceil (op0, op1)); + emit_insn (gen_extenddfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_ceil (op0, op1)); - emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0)); + } DONE; }) (define_expand "ceilsf2" [(use (match_operand:SF 0 "register_operand" "")) (use (match_operand:SF 1 "register_operand" ""))] - "TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" + "(TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH + && !flag_trapping_math + && !optimize_size)" { - rtx op0 = gen_reg_rtx (XFmode); - rtx op1 = gen_reg_rtx (XFmode); + if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH + && !flag_trapping_math + && !optimize_size) + ix86_expand_floorceil (operand0, operand1, false); + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); - emit_insn (gen_extendsfxf2 (op1, operands[1])); - emit_insn (gen_frndintxf2_ceil (op0, op1)); + emit_insn (gen_extendsfxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_ceil (op0, op1)); - emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0)); + } DONE; }) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8f5786a..78976ed 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -733,6 +733,16 @@ [(set_attr "type" "ssecmp") (set_attr "mode" "V4SF")]) +(define_insn "sse_maskcmpsf3" + [(set (match_operand:SF 0 "register_operand" "=x") + (match_operator:SF 3 "sse_comparison_operator" + [(match_operand:SF 1 "register_operand" "0") + (match_operand:SF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE" + "cmp%D3ss\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "SF")]) + (define_insn "sse_vmmaskcmpv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF @@ -1718,6 +1728,16 @@ [(set_attr "type" "ssecmp") (set_attr "mode" "V2DF")]) +(define_insn "sse2_maskcmpdf3" + [(set (match_operand:DF 0 "register_operand" "=x") + (match_operator:DF 3 "sse_comparison_operator" + [(match_operand:DF 1 "register_operand" "0") + (match_operand:DF 2 "nonimmediate_operand" "xm")]))] + "TARGET_SSE2" + "cmp%D3sd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "DF")]) + (define_insn "sse2_vmmaskcmpv2df3" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2fad67c..55d7f95 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,10 @@ 2006-10-29 Richard Guenther <rguenther@suse.de> + * gcc.target/i386/math-torture/ceil.c: New testcase. + * gcc.target/i386/math-torture/floor.c: Likewise. + +2006-10-29 Richard Guenther <rguenther@suse.de> + * gcc.target/i386/math-torture/rint.c: New testcase. * gcc.target/i386/math-torture/nearbyint.c: Likewise. diff --git a/gcc/testsuite/gcc.target/i386/math-torture/ceil.c b/gcc/testsuite/gcc.target/i386/math-torture/ceil.c new file mode 100644 index 0000000..dfccd7a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/math-torture/ceil.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ + +float testlf (float x) +{ + return __builtin_ceilf (x); +} +double testl (double x) +{ + return __builtin_ceil (x); +} +long double testll (long double x) +{ + return __builtin_ceill (x); +} + diff --git a/gcc/testsuite/gcc.target/i386/math-torture/floor.c b/gcc/testsuite/gcc.target/i386/math-torture/floor.c new file mode 100644 index 0000000..0c3aa91 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/math-torture/floor.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ + +float testlf (float x) +{ + return __builtin_floorf (x); +} +double testl (double x) +{ + return __builtin_floor (x); +} +long double testll (long double x) +{ + return __builtin_floorl (x); +} + |