aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2007-09-11 13:38:05 +0200
committerJan Hubicka <hubicka@gcc.gnu.org>2007-09-11 11:38:05 +0000
commit630ecd8d109104ecc207dd6c4fb96b02ba477239 (patch)
tree2968bb9daec1e127c158652543d8a6952d1fab0e /gcc
parent7986e000a93bd150c589470b8cb81ad3489fa250 (diff)
downloadgcc-630ecd8d109104ecc207dd6c4fb96b02ba477239.zip
gcc-630ecd8d109104ecc207dd6c4fb96b02ba477239.tar.gz
gcc-630ecd8d109104ecc207dd6c4fb96b02ba477239.tar.bz2
i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS.
* i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS. (TARGET_INTER_UNIT_CONVERSIONS): New. * i386.md (floatsi expanders): Remove redundant check for SImode source; offload to memory when asked for. (floatsisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse floatdisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse): Update conditions; (floatsisf2_mixed_memory, floatsisf2_sse_memory, floatsidf2_mixed_memory, floatsidf2_sse_memory floatdisf2_mixed_memory, floatsisf2_sse_memory, floatsidf2_mixed_memory, floatsidf2_sse_memory): New. From-SVN: r128369
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/i386/i386.c3
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/i386.md175
4 files changed, 180 insertions, 15 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1706076..cc96242 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,19 @@
2007-09-11 Jan Hubicka <jh@suse.cz>
+ * i386.h (ix86_tune_indices): Add X86_TUNE_INTER_UNIT_CONVERSIONS.
+ (TARGET_INTER_UNIT_CONVERSIONS): New.
+ * i386.md (floatsi expanders): Remove redundant check for SImode
+ source; offload to memory when asked for.
+ (floatsisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse
+ floatdisf2_mixed, floatsisf2_sse, floatsidf2_mixed, floatsidf2_sse):
+ Update conditions;
+ (floatsisf2_mixed_memory, floatsisf2_sse_memory,
+ floatsidf2_mixed_memory, floatsidf2_sse_memory
+ floatdisf2_mixed_memory, floatsisf2_sse_memory,
+ floatsidf2_mixed_memory, floatsidf2_sse_memory): New.
+
+2007-09-11 Jan Hubicka <jh@suse.cz>
+
* toplev.c (process_options): all frontends now do unit-at-a-time.
* cgraphunit.c: update comments.
(cgraph_expand_function): call passmanager dirrectly; emit thunks.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 57fb628..ecec205 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1376,6 +1376,9 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_INTER_UNIT_MOVES */
~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
+ /* X86_TUNE_INTER_UNIT_CONVERSIONS */
+ ~(m_AMDFAM10),
+
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a14c74b..451df2e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -259,6 +259,7 @@ enum ix86_tune_indices {
X86_TUNE_SHIFT1,
X86_TUNE_USE_FFREEP,
X86_TUNE_INTER_UNIT_MOVES,
+ X86_TUNE_INTER_UNIT_CONVERSIONS,
X86_TUNE_FOUR_JUMP_LIMIT,
X86_TUNE_SCHEDULE,
X86_TUNE_USE_BT,
@@ -336,6 +337,8 @@ extern unsigned int ix86_tune_features[X86_TUNE_LAST];
#define TARGET_SHIFT1 ix86_tune_features[X86_TUNE_SHIFT1]
#define TARGET_USE_FFREEP ix86_tune_features[X86_TUNE_USE_FFREEP]
#define TARGET_INTER_UNIT_MOVES ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
+#define TARGET_INTER_UNIT_CONVERSIONS\
+ ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
#define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
#define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE]
#define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT]
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5b50274..0c625da 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4775,14 +4775,13 @@
"TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
"
/* When we use vector converts, we can't have input in memory. */
- if (GET_MODE (operands[0]) == DFmode && GET_MODE (operands[1]) == SImode
+ if (GET_MODE (operands[0]) == DFmode
&& TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (DFmode))
operands[1] = force_reg (SImode, operands[1]);
-
- if (GET_MODE (operands[0]) == SFmode && GET_MODE (operands[1]) == SImode
- && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
- && SSE_FLOAT_MODE_P (SFmode))
+ else if (GET_MODE (operands[0]) == SFmode
+ && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (SFmode))
{
/* When !flag_trapping_math, we handle SImode->SFmode vector
conversions same way as SImode->DFmode.
@@ -4811,6 +4810,19 @@
operands[1] = tmp;
}
}
+ /* Offload operand of cvtsi2ss and cvtsi2sd into memory for
+ !TARGET_INTER_UNIT_CONVERSIONS
+ It is neccesary for the patterns to not accept nonemmory operands
+ as we would optimize out later. */
+ else if (!TARGET_INTER_UNIT_CONVERSIONS
+ && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && !optimize_size
+ && !MEM_P (operands[1]))
+ {
+ rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+ emit_move_insn (tmp, operands[1]);
+ operands[1] = tmp;
+ }
")
(define_insn "*floatsisf2_mixed_vector"
@@ -4833,7 +4845,8 @@
[(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))]
"TARGET_MIX_SSE_I387
- && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
+ && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+ || optimize_size)"
"@
fild%z1\t%1
#
@@ -4846,6 +4859,20 @@
(set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
+(define_insn "*floatsisf2_mixed_memory"
+ [(set (match_operand:SF 0 "register_operand" "=f,x")
+ (float:SF (match_operand:SI 1 "memory_operand" "m,m")))]
+ "TARGET_MIX_SSE_I387
+ && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+ "@
+ fild%z1\t%1
+ cvtsi2ss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fmov,sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "*,double")
+ (set_attr "amdfam10_decode" "*,double")
+ (set_attr "fp_int_src" "true")])
+
(define_insn "*floatsisf2_sse_vector_nointernunit"
[(set (match_operand:SF 0 "register_operand" "=x")
(float:SF (match_operand:SI 1 "memory_operand" "m")))]
@@ -4907,7 +4934,8 @@
[(set (match_operand:SF 0 "register_operand" "=x,x")
(float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
"TARGET_SSE_MATH
- && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
+ && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+ || optimize_size)"
"cvtsi2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
@@ -4915,6 +4943,18 @@
(set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
+(define_insn "*floatsisf2_sse_memory"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (float:SF (match_operand:SI 1 "memory_operand" "m")))]
+ "TARGET_SSE_MATH
+ && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+ "cvtsi2ss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "double")
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "fp_int_src" "true")])
+
(define_insn "*floatsidf2_mixed_vector"
[(set (match_operand:DF 0 "register_operand" "=x,f,f")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
@@ -4935,7 +4975,8 @@
[(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))]
"TARGET_SSE2 && TARGET_MIX_SSE_I387
- && (!TARGET_USE_VECTOR_CONVERTS || !optimize_size)"
+ && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+ || optimize_size)"
"@
fild%z1\t%1
#
@@ -4949,6 +4990,20 @@
(set_attr "amdfam10_decode" "*,*,vector,double,double")
(set_attr "fp_int_src" "true,true,true,true,false")])
+(define_insn "*floatsidf2_mixed_memory"
+ [(set (match_operand:DF 0 "register_operand" "=f,x")
+ (float:DF (match_operand:SI 1 "memory_operand" "m,m")))]
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387
+ && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+ "@
+ fild%z1\t%1
+ cvtsi2sd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fmov,sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "*,direct")
+ (set_attr "amdfam10_decode" "*,double")
+ (set_attr "fp_int_src" "true")])
+
(define_insn "*floatsidf2_sse_vector"
[(set (match_operand:DF 0 "register_operand" "=x")
(float:DF (match_operand:SI 1 "register_operand" "x")))]
@@ -4981,7 +5036,8 @@
[(set (match_operand:DF 0 "register_operand" "=x,x,!x")
(float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))]
"TARGET_SSE2 && TARGET_SSE_MATH
- && (!TARGET_USE_VECTOR_CONVERTS || optimize_size)"
+ && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+ || optimize_size)"
"@
cvtsi2sd\t{%1, %0|%0, %1}
cvtsi2sd\t{%1, %0|%0, %1}
@@ -4992,6 +5048,19 @@
(set_attr "amdfam10_decode" "vector,double,double")
(set_attr "fp_int_src" "true")])
+(define_insn "*floatsidf2_memory"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (float:DF (match_operand:SI 1 "memory_operand" "x")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+ || optimize_size)"
+ "cvtsi2sd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "fp_int_src" "true")])
+
(define_insn "*floatsi<mode>2_i387"
[(set (match_operand:MODEF 0 "register_operand" "=f,f")
(float:MODEF
@@ -5010,12 +5079,23 @@
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
"TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)"
- "")
+{
+ if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
+ && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode)
+ && !optimize_size
+ && !MEM_P (operands[1]))
+ {
+ rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+ emit_move_insn (tmp, operands[1]);
+ operands[1] = tmp;
+ }
+})
(define_insn "*floatdisf2_mixed"
[(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
- "TARGET_64BIT && TARGET_MIX_SSE_I387"
+ "TARGET_64BIT && TARGET_MIX_SSE_I387
+ && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"@
fild%z1\t%1
#
@@ -5028,10 +5108,25 @@
(set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
+(define_insn "*floatdisf2_mixed"
+ [(set (match_operand:SF 0 "register_operand" "=f,x")
+ (float:SF (match_operand:DI 1 "memory_operand" "m,m")))]
+ "TARGET_64BIT && TARGET_MIX_SSE_I387
+ && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+ "@
+ fild%z1\t%1
+ cvtsi2ss{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fmov,sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "*,double")
+ (set_attr "amdfam10_decode" "*,double")
+ (set_attr "fp_int_src" "true")])
+
(define_insn "*floatdisf2_sse"
[(set (match_operand:SF 0 "register_operand" "=x,x")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
- "TARGET_64BIT && TARGET_SSE_MATH"
+ "TARGET_64BIT && TARGET_SSE_MATH
+ && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"cvtsi2ss{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
@@ -5039,6 +5134,18 @@
(set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
+(define_insn "*floatdisf2_memory"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (float:SF (match_operand:DI 1 "memory_operand" "m")))]
+ "TARGET_64BIT && TARGET_SSE_MATH
+ && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+ "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "double")
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "fp_int_src" "true")])
+
(define_expand "floatdidf2"
[(set (match_operand:DF 0 "register_operand" "")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
@@ -5049,12 +5156,22 @@
ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
DONE;
}
+ if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
+ && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode)
+ && !optimize_size
+ && !MEM_P (operands[1]))
+ {
+ rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+ emit_move_insn (tmp, operands[1]);
+ operands[1] = tmp;
+ }
})
(define_insn "*floatdidf2_mixed"
[(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
- "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387"
+ "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
+ && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"@
fild%z1\t%1
#
@@ -5067,10 +5184,25 @@
(set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
+(define_insn "*floatdidf2_mixed_memory"
+ [(set (match_operand:DF 0 "register_operand" "=f,x")
+ (float:DF (match_operand:DI 1 "memory_operand" "m,m")))]
+ "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
+ && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+ "@
+ fild%z1\t%1
+ cvtsi2sd{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "fmov,sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "*,direct")
+ (set_attr "amdfam10_decode" "*,double")
+ (set_attr "fp_int_src" "true")])
+
(define_insn "*floatdidf2_sse"
[(set (match_operand:DF 0 "register_operand" "=x,x")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
- "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+ "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+ && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
"cvtsi2sd{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
@@ -5078,11 +5210,24 @@
(set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
+(define_insn "*floatdidf2_sse_memory"
+ [(set (match_operand:DF 0 "register_operand" "=x")
+ (float:DF (match_operand:DI 1 "memory_operand" "m")))]
+ "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+ && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+ "cvtsi2sd{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "fp_int_src" "true")])
+
(define_insn "*floatdi<mode>2_i387"
[(set (match_operand:MODEF 0 "register_operand" "=f,f")
(float:MODEF
(match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
- "TARGET_80387"
+ "TARGET_80387
+ && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
"@
fild%z1\t%1
#"