diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2012-11-20 19:02:36 +0100 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2012-11-20 19:02:36 +0100 |
commit | 7100b5616c1998a115ad2b398a61c6daed5b3b88 (patch) | |
tree | 34852e73dd7aac72581066d216146b17d51f6061 /gcc | |
parent | b20ade36e40a97f2836926dd95cbb5a7d087f58f (diff) | |
download | gcc-7100b5616c1998a115ad2b398a61c6daed5b3b88.zip gcc-7100b5616c1998a115ad2b398a61c6daed5b3b88.tar.gz gcc-7100b5616c1998a115ad2b398a61c6daed5b3b88.tar.bz2 |
[multiple changes]
2012-11-20 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (fix_trunc<MODEF:mode><SWI48:mode>_sse): Macroize
insn from fix_trunc<mode>{si,di}_sse using SWI48 mode iterator.
(peephole2 to avoid vector decoded forms): Macroize peephole2
using MODEF mode iterator. Use SWI48 mode iterator instead of SWI48x.
2012-11-20 Uros Bizjak <ubizjak@gmail.com>
PR target/19398
* config/i386/i386.md
(peephole2 to shorten x87->SSE reload sequences): Remove peephole2.
* config/i386/i386.h (enum ix86_tune_indices)
<IX86_TUNE_SHORTEN_X87_SSE>: Remove.
* config/i386/i386.h (initial_ix86_tune_features): Update.
2012-11-20 Vladimir Makarov <vmakarov@redhat.com>
PR target/19398
* lra-constraints.c (process_alt_operands): Discourage reloads
through secodnary memory.
testsuite/ChangeLog:
2012-11-20 Uros Bizjak <ubizjak@gmail.com>
PR target/19398
* gcc.target/i386/pr19398.c: New test.
From-SVN: r193671
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 25 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 62 | ||||
-rw-r--r-- | gcc/lra-constraints.c | 13 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr19398.c | 9 |
7 files changed, 68 insertions, 51 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e022c8a..cc6441c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2012-11-20 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.md (fix_trunc<MODEF:mode><SWI48:mode>_sse): Macroize + insn from fix_trunc<mode>{si,di}_sse using SWI48 mode iterator. + (peephole2 to avoid vector decoded forms): Macroize peephole2 + using MODEF mode iterator. Use SWI48 mode iterator instead of SWI48x. + +2012-11-20 Uros Bizjak <ubizjak@gmail.com> + + PR target/19398 + * config/i386/i386.md + (peephole2 to shorten x87->SSE reload sequences): Remove peephole2. + * config/i386/i386.h (enum ix86_tune_indices) + <IX86_TUNE_SHORTEN_X87_SSE>: Remove. + * config/i386/i386.h (initial_ix86_tune_features): Update. + +2012-11-20 Vladimir Makarov <vmakarov@redhat.com> + + PR target/19398 + * lra-constraints.c (process_alt_operands): Discourage reloads + through secodnary memory. + 2012-11-20 David Edelsohn <dje.gcc@gmail.com> * config/rs6000/rs6000.md (largetoc_low): Revert. @@ -48,8 +70,7 @@ (v850_function_arg_advance): Likewise. (v850_print_operand): Handle CONST_INT and CONST_DOUBLE. (compute_register_save_size): Use df_regs_ever_live_p. - (increment_stack): Mark prologue adjustments as being frame - related. + (increment_stack): Mark prologue adjustments as being frame related. (expand_prologue): Handle pretend args. Mark insns generated as being frame related. (expand_epilogue): Likewise. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 8ce32be..fc75771 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1855,9 +1855,6 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_EXT_80387_CONSTANTS */ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC, - /* X86_TUNE_SHORTEN_X87_SSE */ - ~m_K8, - /* X86_TUNE_AVOID_VECTOR_DECODE */ m_CORE2I7_64 | m_K8 | m_GENERIC64, diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 0cdbee1..ef62683 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -314,7 +314,6 @@ enum ix86_tune_indices { X86_TUNE_PAD_RETURNS, X86_TUNE_PAD_SHORT_FUNCTION, X86_TUNE_EXT_80387_CONSTANTS, - X86_TUNE_SHORTEN_X87_SSE, X86_TUNE_AVOID_VECTOR_DECODE, X86_TUNE_PROMOTE_HIMODE_IMUL, X86_TUNE_SLOW_IMUL_IMM32_MEM, @@ -408,7 +407,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION] #define TARGET_EXT_80387_CONSTANTS \ ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS] -#define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE] #define TARGET_AVOID_VECTOR_DECODE \ ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE] #define TARGET_TUNE_PROMOTE_HIMODE_IMUL \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index aa75d6b..05449ba 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4465,60 +4465,34 @@ "operands[2] = gen_reg_rtx (SImode);") ;; When SSE is available, it is always faster to use it! -(define_insn "fix_trunc<mode>di_sse" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] - "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) +(define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse" + [(set (match_operand:SWI48 0 "register_operand" "=r,r") + (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "%vcvtt<ssemodesuffix>2si{q}\t{%1, %0|%0, %1}" + "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}" [(set_attr "type" "sseicvt") (set_attr "prefix" "maybe_vex") - (set_attr "prefix_rex" "1") - (set_attr "mode" "<MODE>") - (set_attr "athlon_decode" "double,vector") - (set_attr "amdfam10_decode" "double,double") - (set_attr "bdver1_decode" "double,double")]) - -(define_insn "fix_trunc<mode>si_sse" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] - "SSE_FLOAT_MODE_P (<MODE>mode) - && (!TARGET_FISTTP || TARGET_SSE_MATH)" - "%vcvtt<ssemodesuffix>2si\t{%1, %0|%0, %1}" - [(set_attr "type" "sseicvt") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "<MODE>") + (set (attr "prefix_rex") + (if_then_else + (match_test "<SWI48:MODE>mode == DImode") + (const_string "1") + (const_string "*"))) + (set_attr "mode" "<MODEF:MODE>") (set_attr "athlon_decode" "double,vector") (set_attr "amdfam10_decode" "double,double") (set_attr "bdver1_decode" "double,double")]) -;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns. -(define_peephole2 - [(set (match_operand:MODEF 0 "register_operand") - (match_operand:MODEF 1 "memory_operand")) - (set (match_operand:SWI48x 2 "register_operand") - (fix:SWI48x (match_dup 0)))] - "TARGET_SHORTEN_X87_SSE - && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()) - && peep2_reg_dead_p (2, operands[0])" - [(set (match_dup 2) (fix:SWI48x (match_dup 1)))]) - ;; Avoid vector decoded forms of the instruction. (define_peephole2 - [(match_scratch:DF 2 "x") - (set (match_operand:SWI48x 0 "register_operand") - (fix:SWI48x (match_operand:DF 1 "memory_operand")))] - "TARGET_SSE2 && TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SWI48x (match_dup 2)))]) - -(define_peephole2 - [(match_scratch:SF 2 "x") - (set (match_operand:SWI48x 0 "register_operand") - (fix:SWI48x (match_operand:SF 1 "memory_operand")))] - "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" + [(match_scratch:MODEF 2 "x") + (set (match_operand:SWI48 0 "register_operand") + (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))] + "TARGET_AVOID_VECTOR_DECODE + && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) + && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SWI48x (match_dup 2)))]) + (set (match_dup 0) (fix:SWI48 (match_dup 2)))]) (define_insn_and_split "fix_trunc<mode>_fisttp_i387_1" [(set (match_operand:SWI248x 0 "nonimmediate_operand") diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 6f19c18..9df7b97 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -1942,6 +1942,19 @@ process_alt_operands (int only_alternative) if (no_regs_p && REG_P (op)) reject++; +#ifdef SECONDARY_MEMORY_NEEDED + /* If reload requires moving value through secondary + memory, it will need one more insn at least. */ + if (this_alternative != NO_REGS + && REG_P (op) && (cl = get_reg_class (REGNO (op))) != NO_REGS + && ((curr_static_id->operand[nop].type != OP_OUT + && SECONDARY_MEMORY_NEEDED (cl, this_alternative, + GET_MODE (op))) + || (curr_static_id->operand[nop].type != OP_IN + && SECONDARY_MEMORY_NEEDED (this_alternative, cl, + GET_MODE (op))))) + losers++; +#endif /* Input reloads can be inherited more often than output reloads can be removed, so penalize output reloads. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cb65187..0cff709 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2012-11-20 Uros Bizjak <ubizjak@gmail.com> + + PR target/19398 + * gcc.target/i386/pr19398.c: New test. + 2012-11-20 Martin Jambor <mjambor@suse.cz> PR tree-optimization/55260 diff --git a/gcc/testsuite/gcc.target/i386/pr19398.c b/gcc/testsuite/gcc.target/i386/pr19398.c new file mode 100644 index 0000000..60931c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr19398.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-Os -msse -mno-sse3 -mfpmath=387" } */ + +int test (float a) +{ + return (a * a); +} + +/* { dg-final { scan-assembler-not "cvttss2si\[^\\n\]*%xmm" } } */ |