diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2008-03-21 21:43:12 +0100 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2008-03-21 21:43:12 +0100 |
commit | 7fb1431bfa30ac9d85fb614f786c8076ff407673 (patch) | |
tree | f48dc228641417cc62eedebc0140261cfaf8bac7 /gcc | |
parent | 9e1e64ec2b978e81924000db7d4009b92bb5a638 (diff) | |
download | gcc-7fb1431bfa30ac9d85fb614f786c8076ff407673.zip gcc-7fb1431bfa30ac9d85fb614f786c8076ff407673.tar.gz gcc-7fb1431bfa30ac9d85fb614f786c8076ff407673.tar.bz2 |
re PR target/13958 (Conversion from unsigned to double is painfully slow on P4)
PR target/13958
* config/i386/i386.md ("*floatunssi<mode2>_1"): New pattern with
corresponding post-reload splitters.
("floatunssi<mode>2"): Expand to unsigned_float x87 insn pattern
when x87 FP math is selected.
* config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse):
New function prototype.
* config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New
unreachable function to ease macroization of insn patterns.
From-SVN: r133435
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 8 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 73 |
4 files changed, 89 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2f4c076..3d2ad8f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2008-03-21 Uros Bizjak <ubizjak@gmail.com> + + PR target/13958 + * config/i386/i386.md ("*floatunssi<mode2>_1"): New pattern with + corresponding post-reload splitters. + ("floatunssi<mode>2"): Expand to unsigned_float x87 insn pattern + when x87 FP math is selected. + * config/i386/i386-protos.h (ix86_expand_convert_uns_sixf_sse): + New function prototype. + * config/i386/i386.c (ix86_expand_convert_uns_sixf_sse): New + unreachable function to ease macroization of insn patterns. + 2008-03-21 Martin Jambor <mjambor@suse.cz> * tree-data-ref.c (dump_data_dependence_relation): Avoid data diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 8dd203e..ef2e0ff 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -91,6 +91,7 @@ extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode, extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx); extern void ix86_split_convert_uns_si_sse (rtx[]); extern void ix86_expand_convert_uns_didf_sse (rtx, rtx); +extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx); extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx); extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx); extern void ix86_expand_convert_sign_didf_sse (rtx, rtx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 5bb5494..8ddfa9f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -10903,6 +10903,14 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input) ix86_expand_vector_extract (false, target, fp_xmm, 0); } +/* Not used, but eases macroization of patterns. */ +void +ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED, + rtx input ATTRIBUTE_UNUSED) +{ + gcc_unreachable (); +} + /* Convert an unsigned SImode value into a DFmode. Only currently used for SSE, but applicable anywhere. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4d4978d..8b0a280 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -5313,13 +5313,76 @@ DONE; }) +;; Avoid store forwarding (partial memory) stall penalty by extending +;; SImode value to DImode through XMM register instead of pushing two +;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES +;; targets benefit from this optimization. Also note that fild +;; loads from memory only. + +(define_insn "*floatunssi<mode>2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" "x,m"))) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:SI 3 "=X,x"))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] + "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] +{ + emit_move_insn (operands[3], operands[1]); + operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0); +}) + (define_expand "floatunssi<mode>2" - [(use (match_operand:MODEF 0 "register_operand" "")) - (use (match_operand:SI 1 "nonimmediate_operand" ""))] - "!TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + [(parallel + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_dup 2)) + (clobber (match_scratch:SI 3 ""))])] + "!TARGET_64BIT + && ((TARGET_80387 && TARGET_SSE) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))" { - ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]); - DONE; + if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + { + ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]); + DONE; + } + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (DImode, slot); + } }) (define_expand "floatunsdisf2" |