diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2011-08-14 22:02:32 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2011-08-14 22:02:32 +0200 |
commit | f4ab7b998ed27cc52815ed77710fb1e2bcb03dce (patch) | |
tree | e7566e626ff76d3c38f49235ce9927d79eaa70f2 /gcc/config | |
parent | 8533a1cae9bcc18f1d8b56da641f34db37e63547 (diff) | |
download | gcc-f4ab7b998ed27cc52815ed77710fb1e2bcb03dce.zip gcc-f4ab7b998ed27cc52815ed77710fb1e2bcb03dce.tar.gz gcc-f4ab7b998ed27cc52815ed77710fb1e2bcb03dce.tar.bz2 |
i386.c (ix86_expand_round_sse4): New function.
* config/i386/i386.c (ix86_expand_round_sse4): New function.
* config/i386/i386-protos.h (ix86_expand_round_sse4): New prototype.
* config/i386/i386.md (round<mode>2): Use ix86_expand_round_sse4
for TARGET_ROUND.
(rint<mode>2): Simplify TARGET_ROUND check.
(floor<mode>2): Ditto.
(ceil<mode>2): Ditto.
(btrunc<mode>2): Ditto.
From-SVN: r177751
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 46 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 24 |
3 files changed, 60 insertions, 11 deletions
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index c3eb150..7deeae7 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -174,6 +174,7 @@ extern void ix86_expand_lfloorceil (rtx, rtx, bool); extern void ix86_expand_rint (rtx, rtx); extern void ix86_expand_floorceil (rtx, rtx, bool); extern void ix86_expand_floorceildf_32 (rtx, rtx, bool); +extern void ix86_expand_round_sse4 (rtx, rtx); extern void ix86_expand_round (rtx, rtx); extern void ix86_expand_rounddf_32 (rtx, rtx); extern void ix86_expand_trunc (rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index fedb2ca..fe6ccbe 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -32676,6 +32676,52 @@ ix86_expand_round (rtx operand0, rtx operand1) emit_move_insn (operand0, res); } + +/* Expand SSE sequence for computing round + from OP1 storing into OP0 using sse4 round insn. */ +void +ix86_expand_round_sse4 (rtx op0, rtx op1) +{ + enum machine_mode mode = GET_MODE (op0); + rtx e1, e2, e3, res, half, mask; + const struct real_format *fmt; + REAL_VALUE_TYPE pred_half, half_minus_pred_half; + rtx (*gen_round) (rtx, rtx, rtx); + + switch (mode) + { + case SFmode: + gen_round = gen_sse4_1_roundsf2; + break; + case DFmode: + gen_round = gen_sse4_1_rounddf2; + break; + default: + gcc_unreachable (); + } + + /* e1 = fabs(op1) */ + e1 = ix86_expand_sse_fabs (op1, &mask); + + /* load nextafter (0.5, 0.0) */ + fmt = REAL_MODE_FORMAT (mode); + real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); + REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); + + /* e2 = e1 + 0.5 */ + half = force_reg (mode, const_double_from_real_value (pred_half, mode)); + e2 = expand_simple_binop (mode, PLUS, e1, half, NULL_RTX, 0, OPTAB_DIRECT); + + /* e3 = trunc(e2) */ + e3 = gen_reg_rtx (mode); + emit_insn (gen_round (e3, e2, GEN_INT (ROUND_TRUNC))); + + /* res = copysign (e3, op1) */ + res = gen_reg_rtx (mode); + ix86_sse_copysign_to_positive (res, e3, op1, mask); + + emit_move_insn (op0, res); +} /* Table of valid machine attributes. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e61b0f4..e7ae397 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14394,11 +14394,11 @@ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && !flag_trapping_math) { - if (!TARGET_ROUND && optimize_insn_for_size_p ()) - FAIL; if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 (operands[0], operands[1], GEN_INT (ROUND_MXCSR))); + else if (optimize_insn_for_size_p ()) + FAIL; else ix86_expand_rint (operand0, operand1); } @@ -14431,7 +14431,12 @@ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && !flag_trapping_math && !flag_rounding_math) { - if (TARGET_64BIT || (<MODE>mode != DFmode)) + if (TARGET_ROUND) + { + operands[1] = force_reg (<MODE>mode, operands[1]); + ix86_expand_round_sse4 (operands[0], operands[1]); + } + else if (TARGET_64BIT || (<MODE>mode != DFmode)) ix86_expand_round (operands[0], operands[1]); else ix86_expand_rounddf_32 (operands[0], operands[1]); @@ -14663,14 +14668,13 @@ && !flag_trapping_math)" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (TARGET_ROUND || optimize_insn_for_speed_p ())) + && !flag_trapping_math) { - if (!TARGET_ROUND && optimize_insn_for_size_p ()) - FAIL; if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 (operands[0], operands[1], GEN_INT (ROUND_FLOOR))); + else if (optimize_insn_for_size_p ()) + FAIL; else if (TARGET_64BIT || (<MODE>mode != DFmode)) ix86_expand_floorceil (operand0, operand1, true); else @@ -14922,8 +14926,7 @@ && !flag_trapping_math)" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (TARGET_ROUND || optimize_insn_for_speed_p ())) + && !flag_trapping_math) { if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 @@ -15179,8 +15182,7 @@ && !flag_trapping_math)" { if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH - && !flag_trapping_math - && (TARGET_ROUND || optimize_insn_for_speed_p ())) + && !flag_trapping_math) { if (TARGET_ROUND) emit_insn (gen_sse4_1_round<mode>2 |