aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2007-02-09 10:17:09 -0800
committerRichard Henderson <rth@gcc.gnu.org>2007-02-09 10:17:09 -0800
commited69105c95c1940e31459755fe931acde845deeb (patch)
treeaeccd61acc2216e7bb90aae7a64a206821b76d0c
parent8413669bde530c1ea19e990945f3fd11f6226f63 (diff)
downloadgcc-ed69105c95c1940e31459755fe931acde845deeb.zip
gcc-ed69105c95c1940e31459755fe931acde845deeb.tar.gz
gcc-ed69105c95c1940e31459755fe931acde845deeb.tar.bz2
constraints.md (Ym): New constraint.
* config/i386/constraints.md (Ym): New constraint. * config/i386/i386.md (movsi_1): Change Y2 to Yi constraints. (movdi_1_rex64): Split sse and xmm general register moves from memory move alternatives. Use conditional register constraints. (movsf_1, movdf_integer): Likewise. (zero_extendsidi2_32, zero_extendsidi2_rex64): Likewise. (movdf_integer_rex64): New. (pushsf_rex64): Fix output constraints. * config/i386/sse.md (sse2_loadld): Split rm alternative, use Yi. (sse2_stored): Likewise. (sse2_storeq_rex64): New. * config/i386/i386.c (x86_inter_unit_moves): Enable for not amd and not generic. (ix86_secondary_memory_needed): Don't bypass TARGET_INTER_UNIT_MOVES for optimize_size. Remove SF/DFmode hack. From-SVN: r121767
-rw-r--r--gcc/ChangeLog18
-rw-r--r--gcc/config/i386/constraints.md5
-rw-r--r--gcc/config/i386/i386.c12
-rw-r--r--gcc/config/i386/i386.md214
-rw-r--r--gcc/config/i386/sse.md25
5 files changed, 215 insertions, 59 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index de6a753..52ed599 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2007-02-09 Richard Henderson <rth@redhat.com>
+
+ * config/i386/constraints.md (Ym): New constraint.
+ * config/i386/i386.md (movsi_1): Change Y2 to Yi constraints.
+ (movdi_1_rex64): Split sse and xmm general register moves from
+ memory move alternatives. Use conditional register constraints.
+ (movsf_1, movdf_integer): Likewise.
+ (zero_extendsidi2_32, zero_extendsidi2_rex64): Likewise.
+ (movdf_integer_rex64): New.
+ (pushsf_rex64): Fix output constraints.
+ * config/i386/sse.md (sse2_loadld): Split rm alternative, use Yi.
+ (sse2_stored): Likewise.
+ (sse2_storeq_rex64): New.
+ * config/i386/i386.c (x86_inter_unit_moves): Enable for not
+ amd and not generic.
+ (ix86_secondary_memory_needed): Don't bypass TARGET_INTER_UNIT_MOVES
+ for optimize_size. Remove SF/DFmode hack.
+
2007-02-09 Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
* config/i386/driver-i386.c: Turn on -mtune=native for AMDFAM10.
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index f5b7c51..8f3e3ce 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -86,6 +86,7 @@
;; We use the Y prefix to denote any number of conditional register sets:
;; 2 SSE2 enabled
;; i SSE2 inter-unit moves enabled
+;; m MMX inter-unit moves enabled
(define_register_constraint "Y2" "TARGET_SSE2 ? SSE_REGS : NO_REGS"
"@internal Any SSE register, when SSE2 is enabled.")
@@ -94,6 +95,10 @@
"TARGET_SSE2 && TARGET_INTER_UNIT_MOVES ? SSE_REGS : NO_REGS"
"@internal Any SSE register, when SSE2 and inter-unit moves are enabled.")
+(define_register_constraint "Ym"
+ "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS"
+ "@internal Any MMX register, when inter-unit moves are enabled.")
+
;; Integer constant constraints.
(define_constraint "I"
"Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2dddd69..e5ff76e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1161,9 +1161,7 @@ const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
-/* ??? Allowing interunit moves makes it all too easy for the compiler to put
- integer data in xmm registers. Which results in pretty abysmal code. */
-const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
+const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
| m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
@@ -18301,18 +18299,12 @@ ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
/* If the target says that inter-unit moves are more expensive
than moving through memory, then don't generate them. */
- if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
+ if (!TARGET_INTER_UNIT_MOVES)
return true;
/* Between SSE and general, we have moves no larger than word size. */
if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
return true;
-
- /* ??? For the cost of one register reformat penalty, we could use
- the same instructions to move SFmode and DFmode data, but the
- relevant move patterns don't support those alternatives. */
- if (mode == SFmode || mode == DFmode)
- return true;
}
return false;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ffb575a..4f194d4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1187,9 +1187,9 @@
(define_insn "*movsi_1"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r ,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Y2,*x")
+ "=r ,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
(match_operand:SI 1 "general_operand"
- "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Y2,*x,r ,m "))]
+ "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2022,35 +2022,41 @@
(define_insn "*movdi_1_rex64"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r,r ,r,m ,!m,*y,*y,?rm,?*y,*x,*x,?rm,?*x,?*x,?*y")
+ "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym")
(match_operand:DI 1 "general_operand"
- "Z ,rem,i,re,n ,C ,*y,*y ,rm ,C ,*x,*x ,rm ,*y ,*x"))]
+ "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))]
"TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
{
case TYPE_SSECVT:
- if (which_alternative == 13)
+ if (SSE_REG_P (operands[0]))
return "movq2dq\t{%1, %0|%0, %1}";
else
return "movdq2q\t{%1, %0|%0, %1}";
+
case TYPE_SSEMOV:
if (get_attr_mode (insn) == MODE_TI)
- return "movdqa\t{%1, %0|%0, %1}";
+ return "movdqa\t{%1, %0|%0, %1}";
/* FALLTHRU */
+
case TYPE_MMXMOV:
- /* Moves from and into integer register is done using movd opcode with
- REX prefix. */
+ /* Moves from and into integer register is done using movd
+ opcode with REX prefix. */
if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
- return "movd\t{%1, %0|%0, %1}";
+ return "movd\t{%1, %0|%0, %1}";
return "movq\t{%1, %0|%0, %1}";
+
case TYPE_SSELOG1:
case TYPE_MMXADD:
return "pxor\t%0, %0";
+
case TYPE_MULTI:
return "#";
+
case TYPE_LEA:
return "lea{q}\t{%a1, %0|%0, %a1}";
+
default:
gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
if (get_attr_mode (insn) == MODE_SI)
@@ -2064,13 +2070,13 @@
[(set (attr "type")
(cond [(eq_attr "alternative" "5")
(const_string "mmxadd")
- (eq_attr "alternative" "6,7,8")
+ (eq_attr "alternative" "6,7,8,9,10")
(const_string "mmxmov")
- (eq_attr "alternative" "9")
+ (eq_attr "alternative" "11")
(const_string "sselog1")
- (eq_attr "alternative" "10,11,12")
+ (eq_attr "alternative" "12,13,14,15,16")
(const_string "ssemov")
- (eq_attr "alternative" "13,14")
+ (eq_attr "alternative" "17,18")
(const_string "ssecvt")
(eq_attr "alternative" "4")
(const_string "multi")
@@ -2078,9 +2084,9 @@
(const_string "lea")
]
(const_string "imov")))
- (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*")
- (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*")
- (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI")])
+ (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
;; Stores and loads of ax to arbitrary constant address.
;; We fake an second form of instruction to force reload to load address
@@ -2280,7 +2286,7 @@
(set_attr "mode" "SF,SI,SF")])
(define_insn "*pushsf_rex64"
- [(set (match_operand:SF 0 "push_operand" "=X,X,X")
+ [(set (match_operand:SF 0 "push_operand" "=<,<,<")
(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
"TARGET_64BIT"
{
@@ -2320,9 +2326,9 @@
(define_insn "*movsf_1"
[(set (match_operand:SF 0 "nonimmediate_operand"
- "=f,m ,f,r ,m ,x,x,x ,m ,!*y,!rm,!*y")
+ "=f,m,f,r ,m ,x,x,x ,m,*y,m ,*y,Yi,r ,*Ym,r ")
(match_operand:SF 1 "general_operand"
- "fm,f,G ,rmF,Fr,C ,x ,xm,x,rm ,*y ,*y"))]
+ "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y,r ,Yi,r ,*Ym"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (reload_in_progress || reload_completed
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2358,12 +2364,11 @@
return "movaps\t{%1, %0|%0, %1}";
else
return "movss\t{%1, %0|%0, %1}";
- case 7:
- case 8:
+ case 7: case 8:
return "movss\t{%1, %0|%0, %1}";
- case 9:
- case 10:
+ case 9: case 10:
+ case 12: case 13: case 14: case 15:
return "movd\t{%1, %0|%0, %1}";
case 11:
@@ -2373,7 +2378,7 @@
gcc_unreachable ();
}
}
- [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
+ [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
(set (attr "mode")
(cond [(eq_attr "alternative" "3,4,9,10")
(const_string "SI")
@@ -2609,13 +2614,139 @@
]
(const_string "DF")))])
+(define_insn "*movdf_integer_rex64"
+ [(set (match_operand:DF 0 "nonimmediate_operand"
+ "=f,m,f,r ,m ,Y2*x,Y2*x,Y2*x,m ,Yi,r ")
+ (match_operand:DF 1 "general_operand"
+ "fm,f,G,rmF,Fr,C ,Y2*x,m ,Y2*x,r ,Yi"))]
+ "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && (reload_in_progress || reload_completed
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+ || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size
+ && standard_80387_constant_p (operands[1]))
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], DFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3:
+ case 4:
+ return "#";
+
+ case 5:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ gcc_unreachable ();
+ }
+ case 6:
+ case 7:
+ case 8:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_TI:
+ return "movdqa\t{%1, %0|%0, %1}";
+ case MODE_DI:
+ return "movq\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ case MODE_V1DF:
+ return "movlpd\t{%1, %0|%0, %1}";
+ case MODE_V2SF:
+ return "movlps\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+
+ case 9:
+ case 10:
+ return "movd\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1,2")
+ (const_string "DF")
+ (eq_attr "alternative" "3,4,9,10")
+ (const_string "DI")
+
+ /* For SSE1, we have many fewer alternatives. */
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (cond [(eq_attr "alternative" "5,6")
+ (const_string "V4SF")
+ ]
+ (const_string "V2SF"))
+
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")
+ ]
+ (const_string "V2DF"))
+
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")
+ ]
+ (const_string "DF"))
+ /* For architectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+ (const_int 0))
+ (const_string "V1DF")
+ (const_string "DF"))
+ ]
+ (const_string "DF")))])
+
(define_insn "*movdf_integer"
[(set (match_operand:DF 0 "nonimmediate_operand"
"=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ")
(match_operand:DF 1 "general_operand"
"fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
- && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT)
+ && !optimize_size && TARGET_INTEGER_DFMODE_MOVES
&& (reload_in_progress || reload_completed
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size
@@ -3261,17 +3392,18 @@
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))]
""
- "if (!TARGET_64BIT)
- {
- emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1]));
- DONE;
- }
- ")
+{
+ if (!TARGET_64BIT)
+ {
+ emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1]));
+ DONE;
+ }
+})
(define_insn "zero_extendsidi2_32"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,?*y,?*Y2")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,*y,?*Yi,*Y2")
(zero_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,rm,rm")))
+ (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_64BIT"
"@
@@ -3279,22 +3411,26 @@
#
#
movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
movd\t{%1, %0|%0, %1}"
- [(set_attr "mode" "SI,SI,SI,DI,TI")
- (set_attr "type" "multi,multi,multi,mmxmov,ssemov")])
+ [(set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")
+ (set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")])
(define_insn "zero_extendsidi2_rex64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*y,?*Y2")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,*y,?*Yi,*Y2")
(zero_extend:DI
- (match_operand:SI 1 "nonimmediate_operand" "rm,0,rm,rm")))]
+ (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))]
"TARGET_64BIT"
"@
mov\t{%k1, %k0|%k0, %k1}
#
movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
movd\t{%1, %0|%0, %1}"
- [(set_attr "type" "imovx,imov,mmxmov,ssemov")
- (set_attr "mode" "SI,DI,SI,SI")])
+ [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
+ (set_attr "mode" "SI,DI,DI,DI,TI,TI")])
(define_split
[(set (match_operand:DI 0 "memory_operand" "")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 50abb8a..ed1de19 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3961,26 +3961,25 @@
"operands[2] = CONST0_RTX (V4SImode);")
(define_insn "sse2_loadld"
- [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
(vec_merge:V4SI
(vec_duplicate:V4SI
- (match_operand:SI 2 "nonimmediate_operand" "mr ,m,x"))
- (match_operand:V4SI 1 "reg_or_0_operand" " C ,C,0")
+ (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
+ (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
(const_int 1)))]
"TARGET_SSE"
"@
movd\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
movss\t{%2, %0|%0, %2}
movss\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemov")
- (set_attr "mode" "TI,V4SF,SF")])
+ (set_attr "mode" "TI,TI,V4SF,SF")])
-;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
-;; be taken into account, and movdi isn't fully populated even without.
(define_insn_and_split "sse2_stored"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
(vec_select:SI
- (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 1 "register_operand" "x,Yi")
(parallel [(const_int 0)])))]
"TARGET_SSE"
"#"
@@ -3998,8 +3997,14 @@
"TARGET_SSE"
"")
-;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
-;; be taken into account, and movdi isn't fully populated even without.
+(define_insn "*sse2_storeq_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "x,Yi")
+ (parallel [(const_int 0)])))]
+ "TARGET_64BIT && TARGET_SSE"
+ "#")
+
(define_insn "*sse2_storeq"
[(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
(vec_select:DI