aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2008-04-02 21:07:27 +0200
committerUros Bizjak <uros@gcc.gnu.org>2008-04-02 21:07:27 +0200
commit7b1980026cceb8cdd46dc796b8be79245366f1f7 (patch)
tree56faf3425a568cd278b27628ce70cd0df4b9f171 /gcc
parentce52c73bed836a449ee3f4e5333c57e86aa8bbc9 (diff)
downloadgcc-7b1980026cceb8cdd46dc796b8be79245366f1f7.zip
gcc-7b1980026cceb8cdd46dc796b8be79245366f1f7.tar.gz
gcc-7b1980026cceb8cdd46dc796b8be79245366f1f7.tar.bz2
i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1): Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values in 32bit mode...
* config/i386/i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1): Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values in 32bit mode when XMM registers are available to avoid store forwarding stalls. (floatdi<X87MODEF:mode>2_i387_with_xmm): New insn pattern and corresponding post-reload splitters. From-SVN: r133845
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/i386/i386.md71
2 files changed, 79 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 703fd01..984ff05 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2008-04-02 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/i386.md (*float<SSEMODEI24:mode><X87MODEF:mode>2_1):
+ Emit gen_floatdi<X87MODEF:mode>2_i387_with_xmm for DImode values
+ in 32bit mode when XMM registers are available to avoid store
+ forwarding stalls.
+ (floatdi<X87MODEF:mode>2_i387_with_xmm): New insn pattern and
+ corresponding post-reload splitters.
+
2008-04-02 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (bdesc_sse_3arg): Add __builtin_ia32_shufps
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e2d68bb..adeafc2 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4925,7 +4925,21 @@
"&& 1"
[(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
(clobber (match_dup 2))])]
- "operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);")
+{
+ operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);
+
+ /* Avoid store forwarding (partial memory) stall penalty
+ by passing DImode value through XMM registers. */
+ if (<SSEMODEI24:MODE>mode == DImode && !TARGET_64BIT
+ && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+ && !optimize_size)
+ {
+ emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
+ operands[1],
+ operands[2]));
+ DONE;
+ }
+})
(define_insn "*floatsi<mode>2_vector_mixed_with_temp"
[(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
@@ -5310,6 +5324,61 @@
[(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
"")
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers. */
+
+(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+ (float:X87MODEF
+ (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
+ (clobber (match_scratch:V4SI 3 "=&x,x"))
+ (clobber (match_scratch:V4SI 4 "=&x,x"))
+ (clobber (match_operand:DI 2 "memory_operand" "=m,m"))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+ && !TARGET_64BIT && !optimize_size"
+ "#"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<X87MODEF:MODE>")
+ (set_attr "unit" "i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:DI 1 "register_operand" "")))
+ (clobber (match_operand:V4SI 3 "register_operand" ""))
+ (clobber (match_operand:V4SI 4 "register_operand" ""))
+ (clobber (match_operand:DI 2 "memory_operand" ""))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+ && !TARGET_64BIT && !optimize_size
+ && reload_completed
+ && FP_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+{
+ /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+ Assemble the 64-bit DImode value in an xmm register. */
+ emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 0)));
+ emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 4)));
+ emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
+
+ operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
+ (clobber (match_operand:V4SI 2 "register_operand" ""))
+ (clobber (match_operand:V4SI 3 "register_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+ && !TARGET_64BIT && !optimize_size
+ && reload_completed
+ && FP_REG_P (operands[0])"
+ [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+ "")
+
;; Avoid store forwarding (partial memory) stall penalty by extending
;; SImode value to DImode through XMM register instead of pushing two
;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES