aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/constraints.md6
-rw-r--r--gcc/config/i386/mmx.md179
-rw-r--r--gcc/config/i386/sse.md292
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c94
4 files changed, 332 insertions, 239 deletions
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index a8db33e..eaa582d 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -111,6 +111,8 @@
;; otherwise any SSE register
;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
;; target, otherwise any SSE register.
+;; W any EVEX encodable SSE register for AVX512BW target,
+;; otherwise any SSE register.
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@@ -151,6 +153,10 @@
"TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
"@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
+(define_register_constraint "YW"
+ "TARGET_AVX512BW ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW target, otherwise any SSE register.")
+
;; We use the B prefix to denote any number of internal operands:
;; f FLAGS_REG
;; g GOT memory operand.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c6a2882..4c2b724 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -61,6 +61,9 @@
(define_mode_attr mmxdoublemode
[(V8QI "V8HI") (V4HI "V4SI")])
+(define_mode_attr Yv_Yw
+ [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Move patterns
@@ -1152,10 +1155,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<insn><mode>3"
- [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,<Yv_Yw>")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,Yv")
- (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,<Yv_Yw>")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,<Yv_Yw>")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
@@ -1176,10 +1179,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<insn><mode>3"
- [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yw")
(sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yv")
- (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yw")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yw")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
@@ -1206,9 +1209,9 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_mulv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
- (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
+ (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
"@
@@ -1234,14 +1237,14 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_smulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI
(sign_extend:V4SI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
(sign_extend:V4SI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
(const_int 16))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
@@ -1269,14 +1272,14 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_umulv4hi3_highpart"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
(const_int 16))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
@@ -1313,16 +1316,16 @@
"ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
(define_insn "*mmx_pmaddwd"
- [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yw")
(plus:V2SI
(mult:V2SI
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
(vec_select:V2HI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")
(parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI
@@ -1432,10 +1435,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
(define_insn "*mmx_<code>v4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(smaxmin:V4HI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
@@ -1466,10 +1469,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
(define_insn "*mmx_<code>v8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(umaxmin:V8QI
- (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
@@ -1483,10 +1486,10 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_ashr<mode>3"
- [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,<Yv_Yw>")
(ashiftrt:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
- (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+ (match_operand:MMXMODE24 1 "register_operand" "0,0,<Yv_Yw>")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
psra<mmxvecsize>\t{%2, %0|%0, %2}
@@ -1509,10 +1512,10 @@
"TARGET_MMX_WITH_SSE")
(define_insn "mmx_<insn><mode>3"
- [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,<Yv_Yw>")
(any_lshift:MMXMODE248
- (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
- (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+ (match_operand:MMXMODE248 1 "register_operand" "0,0,<Yv_Yw>")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
p<vshift><mmxvecsize>\t{%2, %0|%0, %2}
@@ -1549,10 +1552,10 @@
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
(define_insn "*mmx_eq<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
(eq:MMXMODEI
- (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
"@
@@ -1565,10 +1568,10 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_gt<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
(gt:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
- (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODEI 1 "register_operand" "0,0,x")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
@@ -1594,19 +1597,20 @@
"operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));")
(define_insn "mmx_andnot<mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
- (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,x,v"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
pandn\t{%2, %0|%0, %2}
pandn\t{%2, %0|%0, %2}
- vpandn\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,sse2_noavx,avx")
- (set_attr "mmx_isa" "native,*,*")
- (set_attr "type" "mmxadd,sselog,sselog")
- (set_attr "mode" "DI,TI,TI")])
+ vpandn\t{%2, %1, %0|%0, %1, %2}
+ vpandnd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+ (set_attr "mmx_isa" "native,*,*,*")
+ (set_attr "type" "mmxadd,sselog,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI,TI")])
(define_expand "mmx_<code><mode>3"
[(set (match_operand:MMXMODEI 0 "register_operand")
@@ -1625,20 +1629,21 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*mmx_<code><mode>3"
- [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
(any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
- (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x,v")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<logic>\t{%2, %0|%0, %2}
p<logic>\t{%2, %0|%0, %2}
- vp<logic>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,sse2_noavx,avx")
- (set_attr "mmx_isa" "native,*,*")
- (set_attr "type" "mmxadd,sselog,sselog")
- (set_attr "mode" "DI,TI,TI")])
+ vp<logic>\t{%2, %1, %0|%0, %1, %2}
+ vp<logic>d\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx,avx512vl")
+ (set_attr "mmx_isa" "native,*,*,*")
+ (set_attr "type" "mmxadd,sselog,sselog,sselog")
+ (set_attr "mode" "DI,TI,TI,TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@@ -1652,12 +1657,12 @@
(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(vec_concat:V8QI
(any_s_truncate:V4QI
- (match_operand:V4HI 1 "register_operand" "0,0,Yv"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yw"))
(any_s_truncate:V4QI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
@@ -1672,12 +1677,12 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_packssdw"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(vec_concat:V4HI
(ss_truncate:V2HI
- (match_operand:V2SI 1 "register_operand" "0,0,Yv"))
+ (match_operand:V2SI 1 "register_operand" "0,0,Yw"))
(ss_truncate:V2HI
- (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))]
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
packssdw\t{%2, %0|%0, %2}
@@ -1692,11 +1697,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_punpckhbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
@@ -1715,11 +1720,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_punpcklbw"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(vec_select:V8QI
(vec_concat:V16QI
- (match_operand:V8QI 1 "register_operand" "0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
+ (match_operand:V8QI 1 "register_operand" "0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
@@ -1738,11 +1743,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_punpckhwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0,0,Yv")
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yw")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -1759,11 +1764,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn_and_split "mmx_punpcklwd"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(vec_select:V4HI
(vec_concat:V8HI
- (match_operand:V4HI 1 "register_operand" "0,0,Yv")
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yw")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -1866,11 +1871,11 @@
})
(define_insn "*mmx_pinsrw"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,YW")
(vec_merge:V4HI
(vec_duplicate:V4HI
(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
- (match_operand:V4HI 1 "register_operand" "0,0,Yv")
+ (match_operand:V4HI 1 "register_operand" "0,0,YW")
(match_operand:SI 3 "const_int_operand")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
@@ -1902,11 +1907,11 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "*mmx_pinsrb"
- [(set (match_operand:V8QI 0 "register_operand" "=x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=x,YW")
(vec_merge:V8QI
(vec_duplicate:V8QI
(match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
- (match_operand:V8QI 1 "register_operand" "0,Yv")
+ (match_operand:V8QI 1 "register_operand" "0,YW")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
@@ -1940,7 +1945,7 @@
(define_insn "*mmx_pextrw"
[(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,r,m")
(vec_select:HI
- (match_operand:V4HI 1 "register_operand" "y,Yv,Yv")
+ (match_operand:V4HI 1 "register_operand" "y,YW,YW")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
@@ -1959,7 +1964,7 @@
[(set (match_operand:SWI48 0 "register_operand" "=r,r")
(zero_extend:SWI48
(vec_select:HI
- (match_operand:V4HI 1 "register_operand" "y,Yv")
+ (match_operand:V4HI 1 "register_operand" "y,YW")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
@@ -1976,7 +1981,7 @@
(define_insn "*mmx_pextrb"
[(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
(vec_select:QI
- (match_operand:V8QI 1 "register_operand" "Yv,Yv")
+ (match_operand:V8QI 1 "register_operand" "YW,YW")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "n,n")])))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
"@
@@ -1993,7 +1998,7 @@
[(set (match_operand:SWI248 0 "register_operand" "=r")
(zero_extend:SWI248
(vec_select:QI
- (match_operand:V8QI 1 "register_operand" "Yv")
+ (match_operand:V8QI 1 "register_operand" "YW")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1"
"%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
@@ -2394,15 +2399,15 @@
})
(define_insn "*mmx_uavgv8qi3"
- [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
(truncate:V8QI
(lshiftrt:V8HI
(plus:V8HI
(plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv"))
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))
(const_vector:V8HI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
(const_int 1) (const_int 1)
@@ -2440,15 +2445,15 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "*mmx_uavgv4hi3"
- [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
(truncate:V4HI
(lshiftrt:V4SI
(plus:V4SI
(plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))
(const_vector:V4SI [(const_int 1) (const_int 1)
(const_int 1) (const_int 1)]))
(const_int 1))))]
@@ -2483,9 +2488,9 @@
})
(define_insn "mmx_psadbw"
- [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
- (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
- (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
+ [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yw")
+ (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")]
UNSPEC_PSADBW))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ca4372d..2cd8e04 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -566,7 +566,8 @@
(V4SI "v") (V8SI "v") (V16SI "v")
(V2DI "v") (V4DI "v") (V8DI "v")
(V4SF "v") (V8SF "v") (V16SF "v")
- (V2DF "v") (V4DF "v") (V8DF "v")])
+ (V2DF "v") (V4DF "v") (V8DF "v")
+ (TI "Yw") (V1TI "Yw") (V2TI "Yw") (V4TI "v")])
(define_mode_attr sse2_avx_avx512f
[(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
@@ -11736,10 +11737,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
- [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
(sat_plusminus:VI12_AVX2_AVX512BW
- (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
- (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,<v_Yw>")
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
@@ -11827,14 +11828,14 @@
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*<s>mul<mode>3_highpart<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
+ (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
+ (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
(const_int 16))))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
&& <mask_mode512bit_condition> && <mask_avx512bw_condition>"
@@ -12128,19 +12129,19 @@
"ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
(define_insn "*avx2_pmaddwd"
- [(set (match_operand:V8SI 0 "register_operand" "=x,v")
+ [(set (match_operand:V8SI 0 "register_operand" "=Yw")
(plus:V8SI
(mult:V8SI
(sign_extend:V8SI
(vec_select:V8HI
- (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
+ (match_operand:V16HI 1 "nonimmediate_operand" "%Yw")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))
(sign_extend:V8SI
(vec_select:V8HI
- (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
+ (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -12161,8 +12162,7 @@
"TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseiadd")
- (set_attr "isa" "*,avx512bw")
- (set_attr "prefix" "vex,evex")
+ (set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(define_expand "sse2_pmaddwd"
@@ -12192,17 +12192,17 @@
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
(define_insn "*sse2_pmaddwd"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V4SI 0 "register_operand" "=x,Yw")
(plus:V4SI
(mult:V4SI
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "vector_operand" "%0,x,v")
+ (match_operand:V8HI 1 "vector_operand" "%0,Yw")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
+ (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)]))))
(mult:V4SI
@@ -12217,13 +12217,12 @@
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
pmaddwd\t{%2, %0|%0, %2}
- vpmaddwd\t{%2, %1, %0|%0, %1, %2}
vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_insn "avx512dq_mul<mode>3<mask_name>"
@@ -12449,10 +12448,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "ashr<mode>3"
- [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,<v_Yw>")
(ashiftrt:VI24_AVX2
- (match_operand:VI24_AVX2 1 "register_operand" "0,x")
- (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
+ (match_operand:VI24_AVX2 1 "register_operand" "0,<v_Yw>")
+ (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
"TARGET_SSE2"
"@
psra<ssemodesuffix>\t{%2, %0|%0, %2}
@@ -12496,10 +12495,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<insn><mode>3"
- [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,<v_Yw>")
(any_lshift:VI248_AVX2
- (match_operand:VI248_AVX2 1 "register_operand" "0,x")
- (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
+ (match_operand:VI248_AVX2 1 "register_operand" "0,<v_Yw>")
+ (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
"TARGET_SSE2"
"@
p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -12571,9 +12570,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx2>_<insn><mode>3"
- [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,Yw")
(any_lshift:VIMAX_AVX2
- (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
+ (match_operand:VIMAX_AVX2 1 "register_operand" "0,Yw")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
{
@@ -12771,20 +12770,19 @@
(set_attr "mode" "TI")])
(define_insn "*<code>v8hi3"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
(smaxmin:V8HI
- (match_operand:V8HI 1 "vector_operand" "%0,x,v")
- (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
+ (match_operand:V8HI 1 "vector_operand" "%0,Yw")
+ (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
p<maxmin_int>w\t{%2, %0|%0, %2}
- vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix_extra" "*,1,1")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_extra" "*,1")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_expand "<code><mode>3"
@@ -12856,20 +12854,19 @@
(set_attr "mode" "TI")])
(define_insn "*<code>v16qi3"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
(umaxmin:V16QI
- (match_operand:V16QI 1 "vector_operand" "%0,x,v")
- (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
+ (match_operand:V16QI 1 "vector_operand" "%0,Yw")
+ (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
p<maxmin_int>b\t{%2, %0|%0, %2}
- vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix_extra" "*,1,1")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_extra" "*,1")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -13888,57 +13885,54 @@
})
(define_insn "<sse2_avx2>_packsswb<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
(vec_concat:VI1_AVX512
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packsswb\t{%2, %0|%0, %2}
- vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix" "orig,<mask_prefix>,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx2>_packssdw<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
(vec_concat:VI2_AVX2
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packssdw\t{%2, %0|%0, %2}
- vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix" "orig,<mask_prefix>,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx2>_packuswb<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
(vec_concat:VI1_AVX512
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packuswb\t{%2, %0|%0, %2}
- vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*,*")
- (set_attr "prefix" "orig,<mask_prefix>,evex")
+ (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx512bw_interleave_highv64qi<mask_name>"
@@ -13986,11 +13980,11 @@
(set_attr "mode" "XI")])
(define_insn "avx2_interleave_highv32qi<mask_name>"
- [(set (match_operand:V32QI 0 "register_operand" "=v")
+ [(set (match_operand:V32QI 0 "register_operand" "=Yw")
(vec_select:V32QI
(vec_concat:V64QI
- (match_operand:V32QI 1 "register_operand" "v")
- (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
+ (match_operand:V32QI 1 "register_operand" "Yw")
+ (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 8) (const_int 40)
(const_int 9) (const_int 41)
(const_int 10) (const_int 42)
@@ -14007,18 +14001,18 @@
(const_int 29) (const_int 61)
(const_int 30) (const_int 62)
(const_int 31) (const_int 63)])))]
- "TARGET_AVX2 && <mask_avx512vl_condition>"
+ "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
"vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "OI")])
(define_insn "vec_interleave_highv16qi<mask_name>"
- [(set (match_operand:V16QI 0 "register_operand" "=x,v")
+ [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
(vec_select:V16QI
(vec_concat:V32QI
- (match_operand:V16QI 1 "register_operand" "0,v")
- (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
+ (match_operand:V16QI 1 "register_operand" "0,Yw")
+ (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 8) (const_int 24)
(const_int 9) (const_int 25)
(const_int 10) (const_int 26)
@@ -14027,7 +14021,7 @@
(const_int 13) (const_int 29)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_SSE2 && <mask_avx512vl_condition>"
+ "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
"@
punpckhbw\t{%2, %0|%0, %2}
vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
@@ -14082,11 +14076,11 @@
(set_attr "mode" "XI")])
(define_insn "avx2_interleave_lowv32qi<mask_name>"
- [(set (match_operand:V32QI 0 "register_operand" "=v")
+ [(set (match_operand:V32QI 0 "register_operand" "=Yw")
(vec_select:V32QI
(vec_concat:V64QI
- (match_operand:V32QI 1 "register_operand" "v")
- (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
+ (match_operand:V32QI 1 "register_operand" "Yw")
+ (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 0) (const_int 32)
(const_int 1) (const_int 33)
(const_int 2) (const_int 34)
@@ -14110,11 +14104,11 @@
(set_attr "mode" "OI")])
(define_insn "vec_interleave_lowv16qi<mask_name>"
- [(set (match_operand:V16QI 0 "register_operand" "=x,v")
+ [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
(vec_select:V16QI
(vec_concat:V32QI
- (match_operand:V16QI 1 "register_operand" "0,v")
- (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
+ (match_operand:V16QI 1 "register_operand" "0,Yw")
+ (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -14162,11 +14156,11 @@
(set_attr "mode" "XI")])
(define_insn "avx2_interleave_highv16hi<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(vec_select:V16HI
(vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "v")
- (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
+ (match_operand:V16HI 1 "register_operand" "Yw")
+ (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 4) (const_int 20)
(const_int 5) (const_int 21)
(const_int 6) (const_int 22)
@@ -14182,11 +14176,11 @@
(set_attr "mode" "OI")])
(define_insn "vec_interleave_highv8hi<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
(vec_select:V8HI
(vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,v")
- (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
+ (match_operand:V8HI 1 "register_operand" "0,Yw")
+ (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
@@ -14230,11 +14224,11 @@
(set_attr "mode" "XI")])
(define_insn "avx2_interleave_lowv16hi<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(vec_select:V16HI
(vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "v")
- (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
+ (match_operand:V16HI 1 "register_operand" "Yw")
+ (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -14250,11 +14244,11 @@
(set_attr "mode" "OI")])
(define_insn "vec_interleave_lowv8hi<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
(vec_select:V8HI
(vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,v")
- (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
+ (match_operand:V8HI 1 "register_operand" "0,Yw")
+ (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
@@ -15190,9 +15184,9 @@
})
(define_insn "avx2_pshuflw_1<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(vec_select:V16HI
- (match_operand:V16HI 1 "nonimmediate_operand" "vm")
+ (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -15264,9 +15258,9 @@
})
(define_insn "sse2_pshuflw_1<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yw")
(vec_select:V8HI
- (match_operand:V8HI 1 "vector_operand" "vBm")
+ (match_operand:V8HI 1 "vector_operand" "YwBm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -15347,9 +15341,9 @@
})
(define_insn "avx2_pshufhw_1<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(vec_select:V16HI
- (match_operand:V16HI 1 "nonimmediate_operand" "vm")
+ (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -15421,9 +15415,9 @@
})
(define_insn "sse2_pshufhw_1<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yw")
(vec_select:V8HI
- (match_operand:V8HI 1 "vector_operand" "vBm")
+ (match_operand:V8HI 1 "vector_operand" "YwBm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -16213,15 +16207,15 @@
})
(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
- [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
(truncate:VI12_AVX2_AVX512BW
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(plus:<ssedoublemode>
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
+ (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,<v_Yw>"))
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
+ (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))
(match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
(const_int 1))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
@@ -16238,10 +16232,10 @@
;; The correct representation for this is absolutely enormous, and
;; surely not generally useful.
(define_insn "<sse2_avx2>_psadbw"
- [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,YW")
(unspec:VI8_AVX2_AVX512BW
- [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
- (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
+ [(match_operand:<ssebytemode> 1 "register_operand" "0,YW")
+ (match_operand:<ssebytemode> 2 "vector_operand" "xBm,YWm")]
UNSPEC_PSADBW))]
"TARGET_SSE2"
"@
@@ -16815,12 +16809,12 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "avx2_pmaddubsw256"
- [(set (match_operand:V16HI 0 "register_operand" "=x,v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(ss_plus:V16HI
(mult:V16HI
(zero_extend:V16HI
(vec_select:V16QI
- (match_operand:V32QI 1 "register_operand" "x,v")
+ (match_operand:V32QI 1 "register_operand" "Yw")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -16831,7 +16825,7 @@
(const_int 28) (const_int 30)])))
(sign_extend:V16HI
(vec_select:V16QI
- (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
+ (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -16863,10 +16857,9 @@
(const_int 29) (const_int 31)]))))))]
"TARGET_AVX2"
"vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,avx512bw")
- (set_attr "type" "sseiadd")
+ [(set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex,evex")
+ (set_attr "prefix" "vex")
(set_attr "mode" "OI")])
;; The correct representation for this is absolutely enormous, and
@@ -16919,19 +16912,19 @@
(set_attr "mode" "XI")])
(define_insn "ssse3_pmaddubsw128"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
(ss_plus:V8HI
(mult:V8HI
(zero_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "register_operand" "0,x,v")
+ (match_operand:V16QI 1 "register_operand" "0,Yw")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))
(sign_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
+ (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -16952,14 +16945,13 @@
"TARGET_SSSE3"
"@
pmaddubsw\t{%2, %0|%0, %2}
- vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_insn "ssse3_pmaddubsw"
@@ -17065,16 +17057,16 @@
})
(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
+ (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
+ (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
(const_int 14))
(match_operand:VI2_AVX2 3 "const1_operand"))
(const_int 1))))]
@@ -17082,13 +17074,12 @@
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
pmulhrsw\t{%2, %0|%0, %2}
- vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,maybe_evex,evex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "smulhrsv4hi3"
@@ -17160,21 +17151,20 @@
(set_attr "mode" "DI,TI,TI")])
(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
(unspec:VI1_AVX512
- [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
- (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
+ [(match_operand:VI1_AVX512 1 "register_operand" "0,<v_Yw>")
+ (match_operand:VI1_AVX512 2 "vector_operand" "xBm,<v_Yw>m")]
UNSPEC_PSHUFB))]
"TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
pshufb\t{%2, %0|%0, %2}
- vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog1")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,maybe_evex,evex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "btver2_decode" "vector")
(set_attr "mode" "<sseinsnmode>")])
@@ -17274,11 +17264,11 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<ssse3_avx2>_palignr<mode>"
- [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
+ [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,<v_Yw>")
(unspec:SSESCALARMODE
- [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
- (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
- (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
+ [(match_operand:SSESCALARMODE 1 "register_operand" "0,<v_Yw>")
+ (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,<v_Yw>m")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
UNSPEC_PALIGNR))]
"TARGET_SSSE3"
{
@@ -17289,19 +17279,18 @@
case 0:
return "palignr\t{%3, %2, %0|%0, %2, %3}";
case 1:
- case 2:
return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
- (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "ssse3_palignrdi"
@@ -17367,9 +17356,9 @@
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
(define_insn "*abs<mode>2"
- [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
+ [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=<v_Yw>")
(abs:VI1248_AVX512VL_AVX512BW
- (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
+ (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "<v_Yw>Bm")))]
"TARGET_SSSE3"
"%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
@@ -17731,22 +17720,21 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>")
(vec_concat:VI2_AVX2
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))]
"TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packusdw\t{%2, %0|%0, %2}
packusdw\t{%2, %0|%0, %2}
- vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,noavx,avx,avx512bw")
+ [(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
+ (set_attr "prefix" "orig,orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_pblendvb"
@@ -17867,9 +17855,9 @@
(set_attr "mode" "TI")])
(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=v")
+ [(set (match_operand:V16HI 0 "register_operand" "=Yw")
(any_extend:V16HI
- (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
+ (match_operand:V16QI 1 "nonimmediate_operand" "Ywm")))]
"TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
"vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
@@ -17935,10 +17923,10 @@
"TARGET_AVX512BW")
(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
(any_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
+ (match_operand:V16QI 1 "register_operand" "Yr,*x,Yw")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
@@ -17952,7 +17940,7 @@
(set_attr "mode" "TI")])
(define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
- [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
(any_extend:V8HI
(match_operand:V8QI 1 "memory_operand" "m,m,m")))]
"TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c
new file mode 100644
index 0000000..8bb3a03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c
@@ -0,0 +1,94 @@
+/* PR target/99321 */
+/* { dg-do assemble { target lp64 } } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target assembler_march_noavx512bw } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */
+
+#include <x86intrin.h>
+
+typedef unsigned char V1 __attribute__((vector_size (16)));
+typedef unsigned char V2 __attribute__((vector_size (32)));
+typedef unsigned short V3 __attribute__((vector_size (16)));
+typedef unsigned short V4 __attribute__((vector_size (32)));
+
+void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_abs_epi8 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_abs_epi8 ((__m256i) b); __asm ("" : : "v" (a)); }
+void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_abs_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_abs_epi16 ((__m256i) b); __asm ("" : : "v" (a)); }
+void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_adds_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_adds_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_adds_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f9 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_subs_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f10 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_subs_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f11 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_subs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f12 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_subs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f13 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_adds_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f14 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_adds_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f15 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_adds_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f16 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f17 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_subs_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f18 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_subs_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f19 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_subs_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f20 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_subs_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f21 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_alignr_epi8 ((__m128i) a, (__m128i) b, 5); __asm ("" : : "v" (a)); }
+void f22 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, 5); __asm ("" : : "v" (a)); }
+void f23 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f24 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_avg_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f25 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_avg_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f26 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_avg_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f27 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_avg_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f28 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_broadcastb_epi8 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f29 (void) { register V2 a __asm ("%xmm16"); register V1 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_broadcastb_epi8 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f30 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_broadcastw_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f31 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_broadcastw_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
+int f32 (void) { register V1 a __asm ("%xmm16"); __asm ("" : "=v" (a)); return _mm_extract_epi8 ((__m128i) a, 3); }
+int f33 (void) { register V3 a __asm ("%xmm16"); __asm ("" : "=v" (a)); return _mm_extract_epi16 ((__m128i) a, 3); }
+void f34 (int c) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_insert_epi8 ((__m128i) b, c, 5); __asm ("" : : "v" (a)); }
+void f35 (int c) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_insert_epi16 ((__m128i) b, c, 5); __asm ("" : : "v" (a)); }
+void f36 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_maddubs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f37 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_maddubs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f38 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_madd_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f39 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_madd_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f40 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_cvtepi8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f41 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_cvtepi8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f42 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_cvtepu8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f43 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_cvtepu8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
+void f44 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhrs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f45 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhrs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f46 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhi_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f47 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhi_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f48 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhi_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f49 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhi_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f50 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_sad_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f51 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_sad_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f52 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_shuffle_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f53 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_shuffle_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f54 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_shufflehi_epi16 ((__m128i) b, 0x5b); __asm ("" : : "v" (a)); }
+void f55 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_shufflehi_epi16 ((__m256i) b, 0x5b); __asm ("" : : "v" (a)); }
+void f56 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_shufflelo_epi16 ((__m128i) b, 0x5b); __asm ("" : : "v" (a)); }
+void f57 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_shufflelo_epi16 ((__m256i) b, 0x5b); __asm ("" : : "v" (a)); }
+void f58 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_slli_si128 ((__m128i) b, 3); __asm ("" : : "v" (a)); }
+void f59 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_slli_si256 ((__m256i) b, 3); __asm ("" : : "v" (a)); }
+void f60 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_srli_si128 ((__m128i) b, 3); __asm ("" : : "v" (a)); }
+void f61 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_srli_si256 ((__m256i) b, 3); __asm ("" : : "v" (a)); }
+void f62 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_sll_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f63 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_sll_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f64 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_slli_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); }
+void f65 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_slli_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); }
+void f66 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srl_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f67 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srl_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f68 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srli_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); }
+void f69 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srli_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); }
+void f70 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_sra_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f71 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_sra_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f72 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srai_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); }
+void f73 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srai_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); }
+void f74 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_unpackhi_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f75 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_unpackhi_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f76 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_unpackhi_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f77 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_unpackhi_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f78 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_unpacklo_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f79 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_unpacklo_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f80 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_unpacklo_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f81 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_unpacklo_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }