aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <uros@gcc.gnu.org>2007-06-06 08:53:29 +0200
committerUros Bizjak <uros@gcc.gnu.org>2007-06-06 08:53:29 +0200
commitf28eb39cee72340b8f202c535368887111047bf3 (patch)
treed67f87e1cfccb0ba58ac15d3d96445d8295ca30e /gcc
parent69f2880c76592888802df4ab3621b9f32cf1523b (diff)
downloadgcc-f28eb39cee72340b8f202c535368887111047bf3.zip
gcc-f28eb39cee72340b8f202c535368887111047bf3.tar.gz
gcc-f28eb39cee72340b8f202c535368887111047bf3.tar.bz2
sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm as flags setting insn.
* config/i386/sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm as flags setting insn. (sse4_2_pcmpistr_cconly): Prefer pcmpistrm as flags setting insn. * config/i386/i386.md (UNSPEC_ROUNDP, UNSPEC_ROUNDS): Remove. (UNSPEC_ROUND): New. ("sse4_1_round<mode>2"): New insn pattern. ("rint<mode>2"): Expand using "sse4_1_round<mode>2" pattern for SSE4.1 targets. ("floor<mode>2"): Rename from floordf2 and floorsf2. Macroize expander using SSEMODEF mode macro. Expand using "sse4_1_round<mode>2" pattern for SSE4.1 targets. ("ceil<mode>2"): Rename from ceildf2 and ceilsf2. Macroize expander using SSEMODEF mode macro. Expand using "sse4_1_round<mode>2" pattern for SSE4.1 targets. ("btrunc<mode>2"): Rename from btruncdf2 and btruncsf2. Macroize expander using SSEMODEF mode macro. Expand using "sse4_1_round<mode>2" pattern for SSE4.1 targets. * config/i386/sse.md ("sse4_1_roundpd", "sse4_1_roundps"): Use UNSPEC_ROUND instead of UNSPEC_ROUNDP. ("sse4_1_roundsd", "sse4_1_roundss"): Use UNSPEC_ROUND instead of UNSPEC_ROUNDS. From-SVN: r125356
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog33
-rw-r--r--gcc/config/i386/i386.md205
-rw-r--r--gcc/config/i386/sse.md28
3 files changed, 124 insertions, 142 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 99faa97..937ac42 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,30 @@
+2007-06-06 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/sse.md (sse4_2_pcmpestr_cconly): Prefer pcmpestrm
+ as flags setting insn.
+ (sse4_2_pcmpistr_cconly): Prefer pcmpistrm as flags setting insn.
+
+2007-06-06 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/i386.md (UNSPEC_ROUNDP, UNSPEC_ROUNDS): Remove.
+ (UNSPEC_ROUND): New.
+ ("sse4_1_round<mode>2"): New insn pattern.
+ ("rint<mode>2"): Expand using "sse4_1_round<mode>2" pattern for
+ SSE4.1 targets.
+ ("floor<mode>2"): Rename from floordf2 and floorsf2. Macroize
+ expander using SSEMODEF mode macro. Expand using
+ "sse4_1_round<mode>2" pattern for SSE4.1 targets.
+ ("ceil<mode>2"): Rename from ceildf2 and ceilsf2. Macroize
+ expander using SSEMODEF mode macro. Expand using
+ "sse4_1_round<mode>2" pattern for SSE4.1 targets.
+ ("btrunc<mode>2"): Rename from btruncdf2 and btruncsf2. Macroize
+ expander using SSEMODEF mode macro. Expand using
+ "sse4_1_round<mode>2" pattern for SSE4.1 targets.
+ * config/i386/sse.md ("sse4_1_roundpd", "sse4_1_roundps"): Use
+ UNSPEC_ROUND instead of UNSPEC_ROUNDP.
+ ("sse4_1_roundsd", "sse4_1_roundss"): Use UNSPEC_ROUND instead of
+ UNSPEC_ROUNDS.
+
2007-06-06 Jan Sjodin <jan.sjodin@amd.com>
Sebastian Pop <sebpop@gmail.com>
@@ -53,7 +80,8 @@
* cfgexpand (label_rtx_for_bb): Likewise.
(expand_gimple_basic_block): Likewise.
* cfghooks.c (dump_bb): Likewise.
- (lv_adjust_loop_header_phi): Avoid using C++ keywords as variable names.
+ (lv_adjust_loop_header_phi): Avoid using C++ keywords as
+ variable names.
(lv_add_condition_to_bb): Likewise.
* cfglayout (relink_block_chain): Cast according to the coding
conventions.
@@ -64,7 +92,8 @@
(dump_recorded_exit): Likewise.
* cfgloop.h (enum loop_estimation): Move out of struct scope...
(struct loop): ... from here.
- * cfgloopmanip.c (rpe_enum_p): Cast according to the coding conventions.
+ * cfgloopmanip.c (rpe_enum_p): Cast according to the coding
+ conventions.
* cfgrtl.c (rtl_create_basic_block): Likewise.
(rtl_split_block): Likewise.
(rtl_dump_bb): Likewise.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3e9a15f..43e58ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -171,8 +171,7 @@
(UNSPEC_MPSADBW 138)
(UNSPEC_PHMINPOSUW 139)
(UNSPEC_PTEST 140)
- (UNSPEC_ROUNDP 141)
- (UNSPEC_ROUNDS 142)
+ (UNSPEC_ROUND 141)
; For SSE4.2 support
(UNSPEC_CRC32 143)
@@ -16999,6 +16998,17 @@
})
+(define_insn "sse4_1_round<mode>2"
+ [(set (match_operand:SSEMODEF 0 "register_operand" "=x")
+ (unspec:SSEMODEF [(match_operand:SSEMODEF 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_SSE4_1"
+ "rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "rintxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 1 "register_operand" "0")]
@@ -17018,12 +17028,18 @@
&& flag_unsafe_math_optimizations)
|| (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
- && !optimize_size)"
+ && (TARGET_SSE4_1 || !optimize_size))"
{
if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& !flag_trapping_math
- && !optimize_size)
- ix86_expand_rint (operand0, operand1);
+ && (TARGET_SSE4_1 || !optimize_size))
+ {
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x04)));
+ else
+ ix86_expand_rint (operand0, operand1);
+ }
else
{
rtx op0 = gen_reg_rtx (XFmode);
@@ -17044,7 +17060,7 @@
&& !flag_trapping_math && !flag_rounding_math
&& !optimize_size"
{
- if ((<MODE>mode != DFmode) || TARGET_64BIT)
+ if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_round (operand0, operand1);
else
ix86_expand_rounddf_32 (operand0, operand1);
@@ -17250,20 +17266,25 @@
DONE;
})
-(define_expand "floordf2"
- [(use (match_operand:DF 0 "register_operand" ""))
- (use (match_operand:DF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
+(define_expand "floor<mode>2"
+ [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+ (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations && !optimize_size)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))"
{
- if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))
{
- if (TARGET_64BIT)
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x01)));
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_floorceil (operand0, operand1, true);
else
ix86_expand_floorceildf_32 (operand0, operand1, true);
@@ -17273,36 +17294,10 @@
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_floor (op0, op1));
-
- emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
- }
- DONE;
-})
-
-(define_expand "floorsf2"
- [(use (match_operand:SF 0 "register_operand" ""))
- (use (match_operand:SF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
-{
- if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
- ix86_expand_floorceil (operand0, operand1, true);
- else
- {
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
-
- emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_floor (op0, op1));
- emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})
@@ -17536,20 +17531,25 @@
DONE;
})
-(define_expand "ceildf2"
- [(use (match_operand:DF 0 "register_operand" ""))
- (use (match_operand:DF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
+(define_expand "ceil<mode>2"
+ [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+ (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations && !optimize_size)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))"
{
- if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))
{
- if (TARGET_64BIT)
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x02)));
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_floorceil (operand0, operand1, false);
else
ix86_expand_floorceildf_32 (operand0, operand1, false);
@@ -17559,36 +17559,10 @@
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_ceil (op0, op1));
-
- emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
- }
- DONE;
-})
-
-(define_expand "ceilsf2"
- [(use (match_operand:SF 0 "register_operand" ""))
- (use (match_operand:SF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
-{
- if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
- ix86_expand_floorceil (operand0, operand1, false);
- else
- {
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
-
- emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_ceil (op0, op1));
- emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})
@@ -17820,20 +17794,25 @@
DONE;
})
-(define_expand "btruncdf2"
- [(use (match_operand:DF 0 "register_operand" ""))
- (use (match_operand:DF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
+(define_expand "btrunc<mode>2"
+ [(use (match_operand:SSEMODEF 0 "register_operand" ""))
+ (use (match_operand:SSEMODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations && !optimize_size)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))"
{
- if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_SSE4_1 || !optimize_size))
{
- if (TARGET_64BIT)
+ if (TARGET_SSE4_1)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x03)));
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
ix86_expand_trunc (operand0, operand1);
else
ix86_expand_truncdf_32 (operand0, operand1);
@@ -17843,36 +17822,10 @@
rtx op0 = gen_reg_rtx (XFmode);
rtx op1 = gen_reg_rtx (XFmode);
- emit_insn (gen_extenddfxf2 (op1, operands[1]));
- emit_insn (gen_frndintxf2_trunc (op0, op1));
-
- emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
- }
- DONE;
-})
-
-(define_expand "btruncsf2"
- [(use (match_operand:SF 0 "register_operand" ""))
- (use (match_operand:SF 1 "register_operand" ""))]
- "((TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
- && flag_unsafe_math_optimizations)
- || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math))
- && !optimize_size"
-{
- if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
- && !flag_trapping_math)
- ix86_expand_trunc (operand0, operand1);
- else
- {
- rtx op0 = gen_reg_rtx (XFmode);
- rtx op1 = gen_reg_rtx (XFmode);
-
- emit_insn (gen_extendsfxf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
emit_insn (gen_frndintxf2_trunc (op0, op1));
- emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
}
DONE;
})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index bdb653d..042146e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6338,7 +6338,7 @@
[(set (match_operand:V2DF 0 "register_operand" "=x")
(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
(match_operand:SI 2 "const_0_to_15_operand" "n")]
- UNSPEC_ROUNDP))]
+ UNSPEC_ROUND))]
"TARGET_SSE4_1"
"roundpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
@@ -6349,7 +6349,7 @@
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
(match_operand:SI 2 "const_0_to_15_operand" "n")]
- UNSPEC_ROUNDP))]
+ UNSPEC_ROUND))]
"TARGET_SSE4_1"
"roundps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecvt")
@@ -6361,7 +6361,7 @@
(vec_merge:V2DF
(unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
(match_operand:SI 3 "const_0_to_15_operand" "n")]
- UNSPEC_ROUNDS)
+ UNSPEC_ROUND)
(match_operand:V2DF 1 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE4_1"
@@ -6375,7 +6375,7 @@
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
(match_operand:SI 3 "const_0_to_15_operand" "n")]
- UNSPEC_ROUNDS)
+ UNSPEC_ROUND)
(match_operand:V4SF 1 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE4_1"
@@ -6504,14 +6504,14 @@
(match_operand:SI 3 "register_operand" "d,d,d,d")
(match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
UNSPEC_PCMPESTR))
- (clobber (match_scratch:SI 5 "=c,c,X,X"))
- (clobber (match_scratch:V16QI 6 "=X,X,Y0,Y0"))]
+ (clobber (match_scratch:V16QI 5 "=Y0,Y0,X,X"))
+ (clobber (match_scratch:SI 6 "= X, X,c,c"))]
"TARGET_SSE4_2"
"@
- pcmpestri\t{%4, %2, %0|%0, %2, %4}
- pcmpestri\t{%4, %2, %0|%0, %2, %4}
pcmpestrm\t{%4, %2, %0|%0, %2, %4}
- pcmpestrm\t{%4, %2, %0|%0, %2, %4}"
+ pcmpestrm\t{%4, %2, %0|%0, %2, %4}
+ pcmpestri\t{%4, %2, %0|%0, %2, %4}
+ pcmpestri\t{%4, %2, %0|%0, %2, %4}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
@@ -6613,14 +6613,14 @@
(match_operand:V16QI 1 "nonimmediate_operand" "x,m,x,m")
(match_operand:SI 2 "const_0_to_255_operand" "n,n,n,n")]
UNSPEC_PCMPISTR))
- (clobber (match_scratch:SI 3 "=c,c,X,X"))
- (clobber (match_scratch:V16QI 4 "=X,X,Y0,Y0"))]
+ (clobber (match_scratch:V16QI 3 "=Y0,Y0,X,X"))
+ (clobber (match_scratch:SI 4 "= X, X,c,c"))]
"TARGET_SSE4_2"
"@
- pcmpistri\t{%2, %1, %0|%0, %1, %2}
- pcmpistri\t{%2, %1, %0|%0, %1, %2}
pcmpistrm\t{%2, %1, %0|%0, %1, %2}
- pcmpistrm\t{%2, %1, %0|%0, %1, %2}"
+ pcmpistrm\t{%2, %1, %0|%0, %1, %2}
+ pcmpistri\t{%2, %1, %0|%0, %1, %2}
+ pcmpistri\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")