aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <uros@gcc.gnu.org>2012-11-20 19:02:36 +0100
committerUros Bizjak <uros@gcc.gnu.org>2012-11-20 19:02:36 +0100
commit7100b5616c1998a115ad2b398a61c6daed5b3b88 (patch)
tree34852e73dd7aac72581066d216146b17d51f6061 /gcc
parentb20ade36e40a97f2836926dd95cbb5a7d087f58f (diff)
downloadgcc-7100b5616c1998a115ad2b398a61c6daed5b3b88.zip
gcc-7100b5616c1998a115ad2b398a61c6daed5b3b88.tar.gz
gcc-7100b5616c1998a115ad2b398a61c6daed5b3b88.tar.bz2
[multiple changes]
2012-11-20 Uros Bizjak <ubizjak@gmail.com> * config/i386/i386.md (fix_trunc<MODEF:mode><SWI48:mode>_sse): Macroize insn from fix_trunc<mode>{si,di}_sse using SWI48 mode iterator. (peephole2 to avoid vector decoded forms): Macroize peephole2 using MODEF mode iterator. Use SWI48 mode iterator instead of SWI48x. 2012-11-20 Uros Bizjak <ubizjak@gmail.com> PR target/19398 * config/i386/i386.md (peephole2 to shorten x87->SSE reload sequences): Remove peephole2. * config/i386/i386.h (enum ix86_tune_indices) <IX86_TUNE_SHORTEN_X87_SSE>: Remove. * config/i386/i386.h (initial_ix86_tune_features): Update. 2012-11-20 Vladimir Makarov <vmakarov@redhat.com> PR target/19398 * lra-constraints.c (process_alt_operands): Discourage reloads through secodnary memory. testsuite/ChangeLog: 2012-11-20 Uros Bizjak <ubizjak@gmail.com> PR target/19398 * gcc.target/i386/pr19398.c: New test. From-SVN: r193671
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog25
-rw-r--r--gcc/config/i386/i386.c3
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/i386.md62
-rw-r--r--gcc/lra-constraints.c13
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr19398.c9
7 files changed, 68 insertions, 51 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e022c8a..cc6441c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2012-11-20 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/i386.md (fix_trunc<MODEF:mode><SWI48:mode>_sse): Macroize
+ insn from fix_trunc<mode>{si,di}_sse using SWI48 mode iterator.
+ (peephole2 to avoid vector decoded forms): Macroize peephole2
+ using MODEF mode iterator. Use SWI48 mode iterator instead of SWI48x.
+
+2012-11-20 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/19398
+ * config/i386/i386.md
+ (peephole2 to shorten x87->SSE reload sequences): Remove peephole2.
+ * config/i386/i386.h (enum ix86_tune_indices)
+ <IX86_TUNE_SHORTEN_X87_SSE>: Remove.
+ * config/i386/i386.h (initial_ix86_tune_features): Update.
+
+2012-11-20 Vladimir Makarov <vmakarov@redhat.com>
+
+ PR target/19398
+ * lra-constraints.c (process_alt_operands): Discourage reloads
+ through secodnary memory.
+
2012-11-20 David Edelsohn <dje.gcc@gmail.com>
* config/rs6000/rs6000.md (largetoc_low): Revert.
@@ -48,8 +70,7 @@
(v850_function_arg_advance): Likewise.
(v850_print_operand): Handle CONST_INT and CONST_DOUBLE.
(compute_register_save_size): Use df_regs_ever_live_p.
- (increment_stack): Mark prologue adjustments as being frame
- related.
+ (increment_stack): Mark prologue adjustments as being frame related.
(expand_prologue): Handle pretend args. Mark insns generated as
being frame related.
(expand_epilogue): Likewise.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8ce32be..fc75771 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1855,9 +1855,6 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_EXT_80387_CONSTANTS */
m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
- /* X86_TUNE_SHORTEN_X87_SSE */
- ~m_K8,
-
/* X86_TUNE_AVOID_VECTOR_DECODE */
m_CORE2I7_64 | m_K8 | m_GENERIC64,
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 0cdbee1..ef62683 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -314,7 +314,6 @@ enum ix86_tune_indices {
X86_TUNE_PAD_RETURNS,
X86_TUNE_PAD_SHORT_FUNCTION,
X86_TUNE_EXT_80387_CONSTANTS,
- X86_TUNE_SHORTEN_X87_SSE,
X86_TUNE_AVOID_VECTOR_DECODE,
X86_TUNE_PROMOTE_HIMODE_IMUL,
X86_TUNE_SLOW_IMUL_IMM32_MEM,
@@ -408,7 +407,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
#define TARGET_EXT_80387_CONSTANTS \
ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
-#define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
#define TARGET_AVOID_VECTOR_DECODE \
ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE]
#define TARGET_TUNE_PROMOTE_HIMODE_IMUL \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index aa75d6b..05449ba 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4465,60 +4465,34 @@
"operands[2] = gen_reg_rtx (SImode);")
;; When SSE is available, it is always faster to use it!
-(define_insn "fix_trunc<mode>di_sse"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
- (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
- "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
+(define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
+ [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+ (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+ "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
&& (!TARGET_FISTTP || TARGET_SSE_MATH)"
- "%vcvtt<ssemodesuffix>2si{q}\t{%1, %0|%0, %1}"
+ "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "maybe_vex")
- (set_attr "prefix_rex" "1")
- (set_attr "mode" "<MODE>")
- (set_attr "athlon_decode" "double,vector")
- (set_attr "amdfam10_decode" "double,double")
- (set_attr "bdver1_decode" "double,double")])
-
-(define_insn "fix_trunc<mode>si_sse"
- [(set (match_operand:SI 0 "register_operand" "=r,r")
- (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
- "SSE_FLOAT_MODE_P (<MODE>mode)
- && (!TARGET_FISTTP || TARGET_SSE_MATH)"
- "%vcvtt<ssemodesuffix>2si\t{%1, %0|%0, %1}"
- [(set_attr "type" "sseicvt")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "<MODE>")
+ (set (attr "prefix_rex")
+ (if_then_else
+ (match_test "<SWI48:MODE>mode == DImode")
+ (const_string "1")
+ (const_string "*")))
+ (set_attr "mode" "<MODEF:MODE>")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
(set_attr "bdver1_decode" "double,double")])
-;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
-(define_peephole2
- [(set (match_operand:MODEF 0 "register_operand")
- (match_operand:MODEF 1 "memory_operand"))
- (set (match_operand:SWI48x 2 "register_operand")
- (fix:SWI48x (match_dup 0)))]
- "TARGET_SHORTEN_X87_SSE
- && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ())
- && peep2_reg_dead_p (2, operands[0])"
- [(set (match_dup 2) (fix:SWI48x (match_dup 1)))])
-
;; Avoid vector decoded forms of the instruction.
(define_peephole2
- [(match_scratch:DF 2 "x")
- (set (match_operand:SWI48x 0 "register_operand")
- (fix:SWI48x (match_operand:DF 1 "memory_operand")))]
- "TARGET_SSE2 && TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
- [(set (match_dup 2) (match_dup 1))
- (set (match_dup 0) (fix:SWI48x (match_dup 2)))])
-
-(define_peephole2
- [(match_scratch:SF 2 "x")
- (set (match_operand:SWI48x 0 "register_operand")
- (fix:SWI48x (match_operand:SF 1 "memory_operand")))]
- "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+ [(match_scratch:MODEF 2 "x")
+ (set (match_operand:SWI48 0 "register_operand")
+ (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
+ "TARGET_AVOID_VECTOR_DECODE
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
+ && optimize_insn_for_speed_p ()"
[(set (match_dup 2) (match_dup 1))
- (set (match_dup 0) (fix:SWI48x (match_dup 2)))])
+ (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
[(set (match_operand:SWI248x 0 "nonimmediate_operand")
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 6f19c18..9df7b97 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -1942,6 +1942,19 @@ process_alt_operands (int only_alternative)
if (no_regs_p && REG_P (op))
reject++;
+#ifdef SECONDARY_MEMORY_NEEDED
+ /* If reload requires moving value through secondary
+ memory, it will need one more insn at least. */
+ if (this_alternative != NO_REGS
+ && REG_P (op) && (cl = get_reg_class (REGNO (op))) != NO_REGS
+ && ((curr_static_id->operand[nop].type != OP_OUT
+ && SECONDARY_MEMORY_NEEDED (cl, this_alternative,
+ GET_MODE (op)))
+ || (curr_static_id->operand[nop].type != OP_IN
+ && SECONDARY_MEMORY_NEEDED (this_alternative, cl,
+ GET_MODE (op)))))
+ losers++;
+#endif
/* Input reloads can be inherited more often than output
reloads can be removed, so penalize output
reloads. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index cb65187..0cff709 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2012-11-20 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/19398
+ * gcc.target/i386/pr19398.c: New test.
+
2012-11-20 Martin Jambor <mjambor@suse.cz>
PR tree-optimization/55260
diff --git a/gcc/testsuite/gcc.target/i386/pr19398.c b/gcc/testsuite/gcc.target/i386/pr19398.c
new file mode 100644
index 0000000..60931c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr19398.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -msse -mno-sse3 -mfpmath=387" } */
+
+int test (float a)
+{
+ return (a * a);
+}
+
+/* { dg-final { scan-assembler-not "cvttss2si\[^\\n\]*%xmm" } } */