aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorIlya Enkovich <ilya.enkovich@intel.com>2014-12-03 08:45:45 +0000
committerIlya Enkovich <ienkovich@gcc.gnu.org>2014-12-03 08:45:45 +0000
commit45392c76508048665ea5ad3269b59894724d5cb5 (patch)
treead09dd9e85bd4fb92af58f56cccbbc9c0eb7429a /gcc
parent17adbcebf01689d2f2c5386845398d9aedbe4cb3 (diff)
downloadgcc-45392c76508048665ea5ad3269b59894724d5cb5.zip
gcc-45392c76508048665ea5ad3269b59894724d5cb5.tar.gz
gcc-45392c76508048665ea5ad3269b59894724d5cb5.tar.bz2
constraints.md (Yr): New.
gcc/ * config/i386/constraints.md (Yr): New. * config/i386/i386.h (reg_class): Add NO_REX_SSE_REGS. (REG_CLASS_NAMES): Likewise. (REG_CLASS_CONTENTS): Likewise. * config/i386/sse.md (*vec_concatv2sf_sse4_1): Add alternatives which use only NO_REX_SSE_REGS. (vec_set<mode>_0): Likewise. (*vec_setv4sf_sse4_1): Likewise. (sse4_1_insertps): Likewise. (*sse4_1_extractps): Likewise. (*sse4_1_mulv2siv2di3<mask_name>): Likewise. (*<sse4_1_avx2>_mul<mode>3<mask_name>): Likewise. (*sse4_1_<code><mode>3<mask_name>): Likewise. (*sse4_1_<code><mode>3): Likewise. (*sse4_1_eqv2di3): Likewise. (sse4_2_gtv2di3): Likewise. (*vec_extractv4si): Likewise. (*vec_concatv2si_sse4_1): Likewise. (vec_concatv2di): Likewise. (<sse4_1>_blend<ssemodesuffix><avxsizesuffix>): Likewise. (<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>): Likewise. (<sse4_1>_dp<ssemodesuffix><avxsizesuffix>): Likewise. (<vi8_sse4_1_avx2_avx512>_movntdqa): Likewise. (<sse4_1_avx2>_mpsadbw): Likewise. (<sse4_1_avx2>packusdw<mask_name>): Likewise. (<sse4_1_avx2>_pblendvb): Likewise. (sse4_1_pblendw): Likewise. (sse4_1_phminposuw): Likewise. (sse4_1_<code>v8qiv8hi2<mask_name>): Likewise. (sse4_1_<code>v4qiv4si2<mask_name>): Likewise. (sse4_1_<code>v4hiv4si2<mask_name>): Likewise. (sse4_1_<code>v2qiv2di2<mask_name>): Likewise. (sse4_1_<code>v2hiv2di2<mask_name>): Likewise. (sse4_1_<code>v2siv2di2<mask_name>): Likewise. (sse4_1_ptest): Likewise. (<sse4_1>_round<ssemodesuffix><avxsizesuffix>): Likewise. (sse4_1_round<ssescalarmodesuffix>): Likewise. * config/i386/subst.md (mask_prefix4): New. * config/i386/x86-tune.def (X86_TUNE_AVOID_4BYTE_PREFIXES): New. gcc/testsuites/ * gcc.target/i386/sse2-init-v2di-2.c: Adjust to changed vec_concatv2di template. From-SVN: r218303
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog42
-rw-r--r--gcc/config/i386/constraints.md6
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/sse.md393
-rw-r--r--gcc/config/i386/subst.md1
-rw-r--r--gcc/config/i386/x86-tune.def4
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c2
8 files changed, 271 insertions, 185 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e3b3477..85a68b9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,45 @@
+2014-12-03 Ilya Enkovich <ilya.enkovich@intel.com>
+
+ * config/i386/constraints.md (Yr): New.
+ * config/i386/i386.h (reg_class): Add NO_REX_SSE_REGS.
+ (REG_CLASS_NAMES): Likewise.
+ (REG_CLASS_CONTENTS): Likewise.
+ * config/i386/sse.md (*vec_concatv2sf_sse4_1): Add alternatives
+ which use only NO_REX_SSE_REGS.
+ (vec_set<mode>_0): Likewise.
+ (*vec_setv4sf_sse4_1): Likewise.
+ (sse4_1_insertps): Likewise.
+ (*sse4_1_extractps): Likewise.
+ (*sse4_1_mulv2siv2di3<mask_name>): Likewise.
+ (*<sse4_1_avx2>_mul<mode>3<mask_name>): Likewise.
+ (*sse4_1_<code><mode>3<mask_name>): Likewise.
+ (*sse4_1_<code><mode>3): Likewise.
+ (*sse4_1_eqv2di3): Likewise.
+ (sse4_2_gtv2di3): Likewise.
+ (*vec_extractv4si): Likewise.
+ (*vec_concatv2si_sse4_1): Likewise.
+ (vec_concatv2di): Likewise.
+ (<sse4_1>_blend<ssemodesuffix><avxsizesuffix>): Likewise.
+ (<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>): Likewise.
+ (<sse4_1>_dp<ssemodesuffix><avxsizesuffix>): Likewise.
+ (<vi8_sse4_1_avx2_avx512>_movntdqa): Likewise.
+ (<sse4_1_avx2>_mpsadbw): Likewise.
+ (<sse4_1_avx2>packusdw<mask_name>): Likewise.
+ (<sse4_1_avx2>_pblendvb): Likewise.
+ (sse4_1_pblendw): Likewise.
+ (sse4_1_phminposuw): Likewise.
+ (sse4_1_<code>v8qiv8hi2<mask_name>): Likewise.
+ (sse4_1_<code>v4qiv4si2<mask_name>): Likewise.
+ (sse4_1_<code>v4hiv4si2<mask_name>): Likewise.
+ (sse4_1_<code>v2qiv2di2<mask_name>): Likewise.
+ (sse4_1_<code>v2hiv2di2<mask_name>): Likewise.
+ (sse4_1_<code>v2siv2di2<mask_name>): Likewise.
+ (sse4_1_ptest): Likewise.
+ (<sse4_1>_round<ssemodesuffix><avxsizesuffix>): Likewise.
+ (sse4_1_round<ssescalarmodesuffix>): Likewise.
+ * config/i386/subst.md (mask_prefix4): New.
+ * config/i386/x86-tune.def (X86_TUNE_AVOID_4BYTE_PREFIXES): New.
+
2014-12-03 Segher Boessenkool <segher@kernel.crashing.org>
PR rtl-optimization/52714
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index b7183a1..c8093f5 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -106,6 +106,8 @@
;; a Integer register when zero extensions with AND are disabled
;; p Integer register when TARGET_PARTIAL_REG_STALL is disabled
;; f x87 register when 80387 floating point arithmetic is enabled
+;; r SSE regs not requiring REX prefix when prefixes avoidance is enabled
+;; and all SSE regs otherwise
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@@ -139,6 +141,10 @@
"(ix86_fpmath & FPMATH_387) ? FLOAT_REGS : NO_REGS"
"@internal Any x87 register when 80387 FP arithmetic is enabled.")
+(define_register_constraint "Yr"
+ "TARGET_SSE ? (X86_TUNE_AVOID_4BYTE_PREFIXES ? NO_REX_SSE_REGS : ALL_SSE_REGS) : NO_REGS"
+ "@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.")
+
;; We use the B prefix to denote any number of internal operands:
;; s Sibcall memory operand, not valid for TARGET_X32
;; w Call memory operand, not valid for TARGET_X32
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index bc76620..df7789d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1311,6 +1311,7 @@ enum reg_class
FP_TOP_REG, FP_SECOND_REG, /* %st(0) %st(1) */
FLOAT_REGS,
SSE_FIRST_REG,
+ NO_REX_SSE_REGS,
SSE_REGS,
EVEX_SSE_REGS,
BND_REGS,
@@ -1369,6 +1370,7 @@ enum reg_class
"FP_TOP_REG", "FP_SECOND_REG", \
"FLOAT_REGS", \
"SSE_FIRST_REG", \
+ "NO_REX_SSE_REGS", \
"SSE_REGS", \
"EVEX_SSE_REGS", \
"BND_REGS", \
@@ -1409,6 +1411,7 @@ enum reg_class
{ 0x0200, 0x0, 0x0 }, /* FP_SECOND_REG */ \
{ 0xff00, 0x0, 0x0 }, /* FLOAT_REGS */ \
{ 0x200000, 0x0, 0x0 }, /* SSE_FIRST_REG */ \
+{ 0x1fe00000, 0x000000, 0x0 }, /* NO_REX_SSE_REGS */ \
{ 0x1fe00000, 0x1fe000, 0x0 }, /* SSE_REGS */ \
{ 0x0,0xffe00000, 0x1f }, /* EVEX_SSE_REGS */ \
{ 0x0, 0x0,0x1e000 }, /* BND_REGS */ \
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ca5d720..c3aaea3 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6338,26 +6338,28 @@
;; Although insertps takes register source, we prefer
;; unpcklps with register source since it is shorter.
(define_insn "*vec_concatv2sf_sse4_1"
- [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
+ [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
(vec_concat:V2SF
- (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
- (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
+ (match_operand:SF 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,m, 0 , m")
+ (match_operand:SF 2 "vector_move_operand" " Yr,*x,x, m,m, m,C,*ym, C")))]
"TARGET_SSE4_1"
"@
unpcklps\t{%2, %0|%0, %2}
+ unpcklps\t{%2, %0|%0, %2}
vunpcklps\t{%2, %1, %0|%0, %1, %2}
insertps\t{$0x10, %2, %0|%0, %2, 0x10}
+ insertps\t{$0x10, %2, %0|%0, %2, 0x10}
vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
%vmovss\t{%1, %0|%0, %1}
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
- [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
- (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
- (set_attr "prefix_data16" "*,*,1,*,*,*,*")
- (set_attr "prefix_extra" "*,*,1,1,*,*,*")
- (set_attr "length_immediate" "*,*,1,1,*,*,*")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
- (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
+ [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
+ (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
+ (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
+ (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
+ (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
+ (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
;; ??? In theory we can match memory for the MMX alternative, but allowing
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
@@ -6405,49 +6407,51 @@
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "vec_set<mode>_0"
[(set (match_operand:VI4F_128 0 "nonimmediate_operand"
- "=v,v,v ,x,x,v,x ,x ,m ,m ,m")
+ "=Yr,*v,v,v ,x,x,v,Yr ,*x ,x ,m ,m ,m")
(vec_merge:VI4F_128
(vec_duplicate:VI4F_128
(match_operand:<ssescalarmode> 2 "general_operand"
- " v,m,*r,m,x,v,*rm,*rm,!x,!*re,!*fF"))
+ " Yr,*v,m,*r,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
(match_operand:VI4F_128 1 "vector_move_operand"
- " C,C,C ,C,0,v,0 ,x ,0 ,0 ,0")
+ " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
(const_int 1)))]
"TARGET_SSE"
"@
%vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
+ %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
%vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
%vmovd\t{%2, %0|%0, %2}
movss\t{%2, %0|%0, %2}
movss\t{%2, %0|%0, %2}
vmovss\t{%2, %1, %0|%0, %1, %2}
pinsrd\t{$0, %2, %0|%0, %2, 0}
+ pinsrd\t{$0, %2, %0|%0, %2, 0}
vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
#
#
#"
- [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
+ [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
(set (attr "type")
- (cond [(eq_attr "alternative" "0,6,7")
+ (cond [(eq_attr "alternative" "0,1,7,8,9")
(const_string "sselog")
- (eq_attr "alternative" "9")
+ (eq_attr "alternative" "11")
(const_string "imov")
- (eq_attr "alternative" "10")
+ (eq_attr "alternative" "12")
(const_string "fmov")
]
(const_string "ssemov")))
- (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
- (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
- (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
- (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
+ (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
+ (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
+ (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
+ (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
;; A subset is vec_setv4sf.
(define_insn "*vec_setv4sf_sse4_1"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
(vec_merge:V4SF
(vec_duplicate:V4SF
- (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
- (match_operand:V4SF 1 "register_operand" "0,x")
+ (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
+ (match_operand:V4SF 1 "register_operand" "0,0,x")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_SSE4_1
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
@@ -6457,26 +6461,27 @@
switch (which_alternative)
{
case 0:
- return "insertps\t{%3, %2, %0|%0, %2, %3}";
case 1:
+ return "insertps\t{%3, %2, %0|%0, %2, %3}";
+ case 2:
return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "V4SF")])
(define_insn "sse4_1_insertps"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
- (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
- (match_operand:V4SF 1 "register_operand" "0,x")
- (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
+ (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
+ (match_operand:V4SF 1 "register_operand" "0,0,x")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
UNSPEC_INSERTPS))]
"TARGET_SSE4_1"
{
@@ -6490,19 +6495,20 @@
switch (which_alternative)
{
case 0:
- return "insertps\t{%3, %2, %0|%0, %2, %3}";
case 1:
+ return "insertps\t{%3, %2, %0|%0, %2, %3}";
+ case 2:
return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "V4SF")])
(define_split
@@ -6544,13 +6550,14 @@
})
(define_insn_and_split "*sse4_1_extractps"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
(vec_select:SF
- (match_operand:V4SF 1 "register_operand" "x,0,x")
- (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
+ (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
"TARGET_SSE4_1"
"@
%vextractps\t{%2, %1, %0|%0, %1, %2}
+ %vextractps\t{%2, %1, %0|%0, %1, %2}
#
#"
"&& reload_completed && SSE_REG_P (operands[0])"
@@ -6575,13 +6582,13 @@
}
DONE;
}
- [(set_attr "isa" "*,noavx,avx")
- (set_attr "type" "sselog,*,*")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix_extra" "1,*,*")
- (set_attr "length_immediate" "1,*,*")
- (set_attr "prefix" "maybe_vex,*,*")
- (set_attr "mode" "V4SF,*,*")])
+ [(set_attr "isa" "*,*,noavx,avx")
+ (set_attr "type" "sselog,sselog,*,*")
+ (set_attr "prefix_data16" "1,1,*,*")
+ (set_attr "prefix_extra" "1,1,*,*")
+ (set_attr "length_immediate" "1,1,*,*")
+ (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
+ (set_attr "mode" "V4SF,V4SF,*,*")])
(define_insn_and_split "*vec_extractv4sf_mem"
[(set (match_operand:SF 0 "register_operand" "=x,*r,f")
@@ -9553,26 +9560,27 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
- [(set (match_operand:V2DI 0 "register_operand" "=x,v")
+ [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
(mult:V2DI
(sign_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
+ (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
(parallel [(const_int 0) (const_int 2)])))))]
"TARGET_SSE4_1 && <mask_avx512vl_condition>
&& ix86_binary_operator_ok (MULT, V4SImode, operands)"
"@
pmuldq\t{%2, %0|%0, %2}
+ pmuldq\t{%2, %0|%0, %2}
vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseimul")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
@@ -9752,19 +9760,20 @@
})
(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
- [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
+ [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
(mult:VI4_AVX512F
- (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
- (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
"TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
"@
pmulld\t{%2, %0|%0, %2}
+ pmulld\t{%2, %0|%0, %2}
vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "<mask_prefix3>")
- (set_attr "btver2_decode" "vector,vector")
+ (set_attr "prefix" "<mask_prefix4>")
+ (set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "mul<mode>3"
@@ -10241,20 +10250,21 @@
})
(define_insn "*sse4_1_<code><mode>3<mask_name>"
- [(set (match_operand:VI14_128 0 "register_operand" "=x,v")
+ [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
(smaxmin:VI14_128
- (match_operand:VI14_128 1 "nonimmediate_operand" "%0,v")
- (match_operand:VI14_128 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
"TARGET_SSE4_1
&& <mask_mode512bit_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
+ p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseiadd")
- (set_attr "prefix_extra" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_extra" "1,1,*")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
(define_insn "*<code>v8hi3"
@@ -10324,20 +10334,21 @@
})
(define_insn "*sse4_1_<code><mode>3<mask_name>"
- [(set (match_operand:VI24_128 0 "register_operand" "=x,v")
+ [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
(umaxmin:VI24_128
- (match_operand:VI24_128 1 "nonimmediate_operand" "%0,v")
- (match_operand:VI24_128 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
+ (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
"TARGET_SSE4_1
&& <mask_mode512bit_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
+ p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseiadd")
- (set_attr "prefix_extra" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_extra" "1,1,*")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
(define_insn "*<code>v16qi3"
@@ -10427,18 +10438,19 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*sse4_1_eqv2di3"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
(eq:V2DI
- (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
+ (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
"TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
"@
pcmpeqq\t{%2, %0|%0, %2}
+ pcmpeqq\t{%2, %0|%0, %2}
vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecmp")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
(define_insn "*sse2_eq<mode>3"
@@ -10474,18 +10486,19 @@
"ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
(define_insn "sse4_2_gtv2di3"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
(gt:V2DI
- (match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
+ (match_operand:V2DI 1 "register_operand" "0,0,x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
"TARGET_SSE4_2"
"@
pcmpgtq\t{%2, %0|%0, %2}
+ pcmpgtq\t{%2, %0|%0, %2}
vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecmp")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
(define_insn "avx2_gt<mode>3"
@@ -12705,9 +12718,9 @@
"operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
(define_insn "*vec_extractv4si"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
(vec_select:SI
- (match_operand:V4SI 1 "register_operand" "x,0,x")
+ (match_operand:V4SI 1 "register_operand" "x,0,0,x")
(parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
"TARGET_SSE4_1"
{
@@ -12717,10 +12730,11 @@
return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
case 1:
+ case 2:
operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
return "psrldq\t{%2, %0|%0, %2}";
- case 2:
+ case 3:
operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
@@ -12728,11 +12742,11 @@
gcc_unreachable ();
}
}
- [(set_attr "isa" "*,noavx,avx")
- (set_attr "type" "sselog1,sseishft1,sseishft1")
- (set_attr "prefix_extra" "1,*,*")
+ [(set_attr "isa" "*,noavx,noavx,avx")
+ (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
+ (set_attr "prefix_extra" "1,*,*,*")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "maybe_vex,orig,vex")
+ (set_attr "prefix" "maybe_vex,orig,orig,vex")
(set_attr "mode" "TI")])
(define_insn "*vec_extractv4si_zext"
@@ -12839,25 +12853,27 @@
(set_attr "mode" "TI,TI,DF,V4SF")])
(define_insn "*vec_concatv2si_sse4_1"
- [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
+ [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y")
(vec_concat:V2SI
- (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
- (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
+ (match_operand:SI 1 "nonimmediate_operand" " 0, 0,x, 0,0, x,rm, 0,rm")
+ (match_operand:SI 2 "vector_move_operand" " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
"TARGET_SSE4_1"
"@
pinsrd\t{$1, %2, %0|%0, %2, 1}
+ pinsrd\t{$1, %2, %0|%0, %2, 1}
vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
punpckldq\t{%2, %0|%0, %2}
+ punpckldq\t{%2, %0|%0, %2}
vpunpckldq\t{%2, %1, %0|%0, %1, %2}
%vmovd\t{%1, %0|%0, %1}
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
- [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
- (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
- (set_attr "prefix_extra" "1,1,*,*,*,*,*")
- (set_attr "length_immediate" "1,1,*,*,*,*,*")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
- (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
+ [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
+ (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
+ (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
+ (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
+ (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
;; ??? In theory we can match memory for the MMX alternative, but allowing
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
@@ -12900,15 +12916,16 @@
;; movd instead of movq is required to handle broken assemblers.
(define_insn "vec_concatv2di"
[(set (match_operand:V2DI 0 "register_operand"
- "=x,x ,Yi,x ,!x,x,x,x,x,x")
+ "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
(vec_concat:V2DI
(match_operand:DI 1 "nonimmediate_operand"
- " 0,x ,r ,xm,*y,0,x,0,0,x")
+ " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
(match_operand:DI 2 "vector_move_operand"
- "rm,rm,C ,C ,C ,x,x,x,m,m")))]
+ "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
"TARGET_SSE"
"@
pinsrq\t{$1, %2, %0|%0, %2, 1}
+ pinsrq\t{$1, %2, %0|%0, %2, 1}
vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
* return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
%vmovq\t{%1, %0|%0, %1}
@@ -12918,17 +12935,17 @@
movlhps\t{%2, %0|%0, %2}
movhps\t{%2, %0|%0, %2}
vmovhps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
+ [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
(set (attr "type")
(if_then_else
- (eq_attr "alternative" "0,1,5,6")
+ (eq_attr "alternative" "0,1,2,6,7")
(const_string "sselog")
(const_string "ssemov")))
- (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
- (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
- (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
- (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
- (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+ (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
+ (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
+ (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
+ (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
+ (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
@@ -13968,61 +13985,64 @@
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(vec_merge:VF_128_256
- (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
- (match_operand:VF_128_256 1 "register_operand" "0,x")
+ (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
+ (match_operand:VF_128_256 1 "register_operand" "0,0,x")
(match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
"TARGET_SSE4_1"
"@
blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "length_immediate" "1")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "<MODE>")])
(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256
- [(match_operand:VF_128_256 1 "register_operand" "0,x")
- (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
- (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
+ [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
+ (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
+ (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
"@
blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "length_immediate" "1")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "vector,vector")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<MODE>")])
(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
(unspec:VF_128_256
- [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
- (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
- (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
+ (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
UNSPEC_DP))]
"TARGET_SSE4_1"
"@
dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemul")
(set_attr "length_immediate" "1")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "vector,vector")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<MODE>")])
;; Mode attribute used by `vmovntdqa' pattern
@@ -14030,86 +14050,90 @@
[(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
(define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
- [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
- (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
+ [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
+ (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
UNSPEC_MOVNTDQA))]
"TARGET_SSE4_1"
"%vmovntdqa\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
- (set_attr "prefix_extra" "1, *")
- (set_attr "prefix" "maybe_vex, evex")
+ (set_attr "prefix_extra" "1,1,*")
+ (set_attr "prefix" "maybe_vex,maybe_vex,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_mpsadbw"
- [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
(unspec:VI1_AVX2
- [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
- (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
- (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
+ (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
UNSPEC_MPSADBW))]
"TARGET_SSE4_1"
"@
mpsadbw\t{%3, %2, %0|%0, %2, %3}
+ mpsadbw\t{%3, %2, %0|%0, %2, %3}
vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sselog1")
(set_attr "length_immediate" "1")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "vector,vector")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
(vec_concat:VI2_AVX2
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
"TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packusdw\t{%2, %0|%0, %2}
+ packusdw\t{%2, %0|%0, %2}
vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_pblendvb"
- [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
(unspec:VI1_AVX2
- [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
- (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
- (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
+ [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
+ (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
+ (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
UNSPEC_BLENDV))]
"TARGET_SSE4_1"
"@
pblendvb\t{%3, %2, %0|%0, %2, %3}
+ pblendvb\t{%3, %2, %0|%0, %2, %3}
vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "*,1")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "vector,vector")
+ (set_attr "length_immediate" "*,*,1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector,vector,vector")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "sse4_1_pblendw"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
(vec_merge:V8HI
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
- (match_operand:V8HI 1 "register_operand" "0,x")
- (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
+ (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
+ (match_operand:V8HI 1 "register_operand" "0,0,x")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
"TARGET_SSE4_1"
"@
pblendw\t{%3, %2, %0|%0, %2, %3}
+ pblendw\t{%3, %2, %0|%0, %2, %3}
vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
;; The builtin uses an 8-bit immediate. Expand that.
@@ -14157,8 +14181,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "sse4_1_phminposuw"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
- (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+ [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
+ (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
UNSPEC_PHMINPOSUW))]
"TARGET_SSE4_1"
"%vphminposuw\t{%1, %0|%0, %1}"
@@ -14190,10 +14214,10 @@
(set_attr "mode" "XI")])
(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
(any_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "nonimmediate_operand" "vm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
@@ -14233,10 +14257,10 @@
(set_attr "mode" "OI")])
(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
+ [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
(any_extend:V4SI
(vec_select:V4QI
- (match_operand:V16QI 1 "nonimmediate_operand" "vm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
"TARGET_SSE4_1 && <mask_avx512vl_condition>"
@@ -14269,10 +14293,10 @@
(set_attr "mode" "OI")])
(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
+ [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
(any_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "nonimmediate_operand" "vm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
"TARGET_SSE4_1 && <mask_avx512vl_condition>"
@@ -14313,10 +14337,10 @@
(set_attr "mode" "OI")])
(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
- [(set (match_operand:V2DI 0 "register_operand" "=v")
+ [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
(any_extend:V2DI
(vec_select:V2QI
- (match_operand:V16QI 1 "nonimmediate_operand" "vm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE4_1 && <mask_avx512vl_condition>"
"%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
@@ -14351,10 +14375,10 @@
(set_attr "mode" "OI")])
(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
- [(set (match_operand:V2DI 0 "register_operand" "=v")
+ [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
(any_extend:V2DI
(vec_select:V2HI
- (match_operand:V8HI 1 "nonimmediate_operand" "vm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE4_1 && <mask_avx512vl_condition>"
"%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
@@ -14386,10 +14410,10 @@
(set_attr "mode" "OI")])
(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
- [(set (match_operand:V2DI 0 "register_operand" "=v")
+ [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
(any_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "vm")
+ (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE4_1 && <mask_avx512vl_condition>"
"%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
@@ -14430,8 +14454,8 @@
(define_insn "sse4_1_ptest"
[(set (reg:CC FLAGS_REG)
- (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
- (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+ (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x")
+ (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")]
UNSPEC_PTEST))]
"TARGET_SSE4_1"
"%vptest\t{%1, %0|%0, %1}"
@@ -14441,10 +14465,10 @@
(set_attr "mode" "TI")])
(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
(unspec:VF_128_256
- [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
- (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
UNSPEC_ROUND))]
"TARGET_ROUND"
"%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
@@ -14524,24 +14548,25 @@
})
(define_insn "sse4_1_round<ssescalarmodesuffix>"
- [(set (match_operand:VF_128 0 "register_operand" "=x,x")
+ [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
(vec_merge:VF_128
(unspec:VF_128
- [(match_operand:VF_128 2 "register_operand" "x,x")
- (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
+ [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
UNSPEC_ROUND)
- (match_operand:VF_128 1 "register_operand" "0,x")
+ (match_operand:VF_128 1 "register_operand" "0,0,x")
(const_int 1)))]
"TARGET_ROUND"
"@
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "length_immediate" "1")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "<MODE>")])
(define_expand "round<mode>2"
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 91228c8..d4ce519 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -63,6 +63,7 @@
(define_subst_attr "mask_prefix" "mask" "vex" "evex")
(define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
(define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex")
+(define_subst_attr "mask_prefix4" "mask" "orig,orig,vex" "evex")
(define_subst_attr "mask_expand_op3" "mask" "3" "5")
(define_subst "mask"
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 735e6e5..b5c6e4f 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -395,6 +395,10 @@ DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
DEF_TUNE (X86_TUNE_VECTOR_PARALLEL_EXECUTION, "vec_parallel",
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
+/* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */
+DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
+ m_SILVERMONT | m_INTEL)
+
/*****************************************************************************/
/* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
/*****************************************************************************/
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ee812d2..b0d1786 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2014-12-03 Ilya Enkovich <ilya.enkovich@intel.com>
+
+ * gcc.target/i386/sse2-init-v2di-2.c: Adjust to changed
+ vec_concatv2di template.
+
2014-12-03 Segher Boessenkool <segher.kernel.crashing.org>
PR rtl-optimization/52714
diff --git a/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c b/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c
index 0aa5264..b347a4a 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-init-v2di-2.c
@@ -10,4 +10,4 @@ test (long long b)
return _mm_cvtsi64_si128 (b);
}
-/* { dg-final { scan-assembler-times "vec_concatv2di/3" 1 } } */
+/* { dg-final { scan-assembler-times "vec_concatv2di/4" 1 } } */