aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2008-03-21 21:43:12 +0100
committerUros Bizjak <uros@gcc.gnu.org>2008-03-21 21:43:12 +0100
commit7fb1431bfa30ac9d85fb614f786c8076ff407673 (patch)
treef48dc228641417cc62eedebc0140261cfaf8bac7 /gcc
parent9e1e64ec2b978e81924000db7d4009b92bb5a638 (diff)
downloadgcc-7fb1431bfa30ac9d85fb614f786c8076ff407673.zip
gcc-7fb1431bfa30ac9d85fb614f786c8076ff407673.tar.gz
gcc-7fb1431bfa30ac9d85fb614f786c8076ff407673.tar.bz2
re PR target/13958 (Conversion from unsigned to double is painfully slow on P4)
PR target/13958 * config/i386/i386.md ("*floatunssi<mode2>_1"): New pattern with corresponding post-reload splitters. ("floatunssi<mode>2"): Expand to unsigned_float x87 insn pattern when x87 FP math is selected. * config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse): New function prototype. * config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New unreachable function to ease macroization of insn patterns. From-SVN: r133435
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/i386.c8
-rw-r--r--gcc/config/i386/i386.md73
4 files changed, 89 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2f4c076..3d2ad8f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2008-03-21 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/13958
+ * config/i386/i386.md ("*floatunssi<mode2>_1"): New pattern with
+ corresponding post-reload splitters.
+ ("floatunssi<mode>2"): Expand to unsigned_float x87 insn pattern
+ when x87 FP math is selected.
+ * config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse):
+ New function prototype.
+ * config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New
+ unreachable function to ease macroization of insn patterns.
+
2008-03-21 Martin Jambor <mjambor@suse.cz>
* tree-data-ref.c (dump_data_dependence_relation): Avoid data
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 8dd203e..ef2e0ff 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -91,6 +91,7 @@ extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
extern void ix86_split_convert_uns_si_sse (rtx[]);
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5bb5494..8ddfa9f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10903,6 +10903,14 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
ix86_expand_vector_extract (false, target, fp_xmm, 0);
}
+/* Not used, but eases macroization of patterns. */
+void
+ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
+ rtx input ATTRIBUTE_UNUSED)
+{
+ gcc_unreachable ();
+}
+
/* Convert an unsigned SImode value into a DFmode. Only currently used
for SSE, but applicable anywhere. */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4d4978d..8b0a280 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5313,13 +5313,76 @@
DONE;
})
+;; Avoid store forwarding (partial memory) stall penalty by extending
+;; SImode value to DImode through XMM register instead of pushing two
+;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
+;; targets benefit from this optimization. Also note that fild
+;; loads from memory only.
+
+(define_insn "*floatunssi<mode>2_1"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+ (unsigned_float:X87MODEF
+ (match_operand:SI 1 "nonimmediate_operand" "x,m")))
+ (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+ (clobber (match_scratch:SI 3 "=X,x"))]
+ "!TARGET_64BIT
+ && TARGET_80387 && TARGET_SSE"
+ "#"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (unsigned_float:X87MODEF
+ (match_operand:SI 1 "register_operand" "")))
+ (clobber (match_operand:DI 2 "memory_operand" ""))
+ (clobber (match_scratch:SI 3 ""))]
+ "!TARGET_64BIT
+ && TARGET_80387 && TARGET_SSE
+ && reload_completed"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0)
+ (float:X87MODEF (match_dup 2)))]
+ "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (unsigned_float:X87MODEF
+ (match_operand:SI 1 "memory_operand" "")))
+ (clobber (match_operand:DI 2 "memory_operand" ""))
+ (clobber (match_scratch:SI 3 ""))]
+ "!TARGET_64BIT
+ && TARGET_80387 && TARGET_SSE
+ && reload_completed"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 0)
+ (float:X87MODEF (match_dup 2)))]
+{
+ emit_move_insn (operands[3], operands[1]);
+ operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
+})
+
(define_expand "floatunssi<mode>2"
- [(use (match_operand:MODEF 0 "register_operand" ""))
- (use (match_operand:SI 1 "nonimmediate_operand" ""))]
- "!TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ [(parallel
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (unsigned_float:X87MODEF
+ (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (match_dup 2))
+ (clobber (match_scratch:SI 3 ""))])]
+ "!TARGET_64BIT
+ && ((TARGET_80387 && TARGET_SSE)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
{
- ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
- DONE;
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ {
+ ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
+ DONE;
+ }
+ else
+ {
+ int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+ operands[2] = assign_386_stack_local (DImode, slot);
+ }
})
(define_expand "floatunsdisf2"