diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 72 |
2 files changed, 30 insertions, 54 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a94b344..9113d5d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2007-03-12 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.md (fixuns_trunc<mode>hi2): Implement from + fixuns_truncsfhi2 and fixuns_truncdfhi2 using SSEMODEF + mode macro. + (fix_trunc<mode>di_sse): Implement from fix_truncsfdi_sse and + fix_truncdfdi_sse using SSEMODEF mode macro. + (fix_trunc<mode>si_sse): Implement from fix_truncsfsi_sse and + fix_truncdfsi_sse using SSEMODEF mode macro. + (fix_trunc?f?i_sse peephole2): Implement using SSEMODEF mode macro. + (fix_trunc?f?i_sse K8 peephole2): Fix register constraint. + 2007-03-12 Richard Sandiford <richard@codesourcery.com> * config.gcc (i[4567]86-wrs-vxworks, i[4567]86-wrs-vxworksae): Add diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6bbb752..f3fb848 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4396,77 +4396,41 @@ ;; Without these patterns, we'll try the unsigned SI conversion which ;; is complex for SSE, rather than the signed SI conversion, which isn't. -(define_expand "fixuns_truncsfhi2" +(define_expand "fixuns_trunc<mode>hi2" [(set (match_dup 2) - (fix:SI (match_operand:SF 1 "nonimmediate_operand" ""))) + (fix:SI (match_operand:SSEMODEF 1 "nonimmediate_operand" ""))) (set (match_operand:HI 0 "nonimmediate_operand" "") (subreg:HI (match_dup 2) 0))] - "TARGET_SSE_MATH" - "operands[2] = gen_reg_rtx (SImode);") - -(define_expand "fixuns_truncdfhi2" - [(set (match_dup 2) - (fix:SI (match_operand:DF 1 "nonimmediate_operand" ""))) - (set (match_operand:HI 0 "nonimmediate_operand" "") - (subreg:HI (match_dup 2) 0))] - "TARGET_SSE_MATH && TARGET_SSE2" + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" "operands[2] = gen_reg_rtx (SImode);") ;; When SSE is available, it is always faster to use it! -(define_insn "fix_truncsfdi_sse" +(define_insn "fix_trunc<mode>di_sse" [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "cvttss2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "SF") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double")]) - -(define_insn "fix_truncdfdi_sse" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "cvttsd2si{q}\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double")]) - -(define_insn "fix_truncsfsi_sse" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "cvttss2si\t{%1, %0|%0, %1}" + (fix:DI (match_operand:SSEMODEF 1 "nonimmediate_operand" "x,xm")))] + "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "<MODE>") (set_attr "athlon_decode" "double,vector") (set_attr "amdfam10_decode" "double,double")]) -(define_insn "fix_truncdfsi_sse" +(define_insn "fix_trunc<mode>si_sse" [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))] - "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "cvttsd2si\t{%1, %0|%0, %1}" + (fix:SI (match_operand:SSEMODEF 1 "nonimmediate_operand" "x,xm")))] + "SSE_FLOAT_MODE_P (<MODE>mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") - (set_attr "mode" "DF") + (set_attr "mode" "<MODE>") (set_attr "athlon_decode" "double,vector") (set_attr "amdfam10_decode" "double,double")]) ;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns. (define_peephole2 - [(set (match_operand:DF 0 "register_operand" "") - (match_operand:DF 1 "memory_operand" "")) - (set (match_operand:SSEMODEI24 2 "register_operand" "") - (fix:SSEMODEI24 (match_dup 0)))] - "!TARGET_K8 - && peep2_reg_dead_p (2, operands[0])" - [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))] - "") - -(define_peephole2 - [(set (match_operand:SF 0 "register_operand" "") - (match_operand:SF 1 "memory_operand" "")) + [(set (match_operand:SSEMODEF 0 "register_operand" "") + (match_operand:SSEMODEF 1 "memory_operand" "")) (set (match_operand:SSEMODEI24 2 "register_operand" "") (fix:SSEMODEI24 (match_dup 0)))] "!TARGET_K8 @@ -4476,7 +4440,7 @@ ;; Avoid vector decoded forms of the instruction. (define_peephole2 - [(match_scratch:DF 2 "Y") + [(match_scratch:DF 2 "Y2") (set (match_operand:SSEMODEI24 0 "register_operand" "") (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size" |