diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/constraints.md | 6 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 179 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 292 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c | 94 |
4 files changed, 332 insertions, 239 deletions
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index a8db33e..eaa582d 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -111,6 +111,8 @@ ;; otherwise any SSE register ;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL ;; target, otherwise any SSE register. +;; W any EVEX encodable SSE register for AVX512BW target, +;; otherwise any SSE register. (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" "First SSE register (@code{%xmm0}).") @@ -151,6 +153,10 @@ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS" "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.") +(define_register_constraint "YW" + "TARGET_AVX512BW ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS" + "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW target, otherwise any SSE register.") + ;; We use the B prefix to denote any number of internal operands: ;; f FLAGS_REG ;; g GOT memory operand. diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index c6a2882..4c2b724 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -61,6 +61,9 @@ (define_mode_attr mmxdoublemode [(V8QI "V8HI") (V4HI "V4SI")]) +(define_mode_attr Yv_Yw + [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Move patterns @@ -1152,10 +1155,10 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*mmx_<insn><mode>3" - [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,<Yv_Yw>") (plusminus:MMXMODEI8 - (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,Yv") - (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "<comm>0,0,<Yv_Yw>") + (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,<Yv_Yw>")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ @@ -1176,10 +1179,10 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*mmx_<insn><mode>3" - [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yw") (sat_plusminus:MMXMODE12 - (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yv") - (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yw") + (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yw")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ @@ -1206,9 +1209,9 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_mulv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") - (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))] + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") + (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (MULT, V4HImode, operands)" "@ @@ -1234,14 +1237,14 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_smulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (truncate:V4HI (lshiftrt:V4SI (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")) (sign_extend:V4SI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))) (const_int 16))))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (MULT, V4HImode, operands)" @@ -1269,14 +1272,14 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_umulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (truncate:V4HI (lshiftrt:V4SI (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")) (zero_extend:V4SI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))) (const_int 16))))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) @@ -1313,16 +1316,16 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_pmaddwd" - [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yw") (plus:V2SI (mult:V2SI (sign_extend:V2SI (vec_select:V2HI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2SI (vec_select:V2HI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw") (parallel [(const_int 0) (const_int 2)])))) (mult:V2SI (sign_extend:V2SI @@ -1432,10 +1435,10 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);") (define_insn "*mmx_<code>v4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (smaxmin:V4HI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (<CODE>, V4HImode, operands)" @@ -1466,10 +1469,10 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);") (define_insn "*mmx_<code>v8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (umaxmin:V8QI - (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (<CODE>, V8QImode, operands)" @@ -1483,10 +1486,10 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_ashr<mode>3" - [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,<Yv_Yw>") (ashiftrt:MMXMODE24 - (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv") - (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))] + (match_operand:MMXMODE24 1 "register_operand" "0,0,<Yv_Yw>") + (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ psra<mmxvecsize>\t{%2, %0|%0, %2} @@ -1509,10 +1512,10 @@ "TARGET_MMX_WITH_SSE") (define_insn "mmx_<insn><mode>3" - [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,<Yv_Yw>") (any_lshift:MMXMODE248 - (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv") - (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))] + (match_operand:MMXMODE248 1 "register_operand" "0,0,<Yv_Yw>") + (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ p<vshift><mmxvecsize>\t{%2, %0|%0, %2} @@ -1549,10 +1552,10 @@ "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") (define_insn "*mmx_eq<mode>3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x") (eq:MMXMODEI - (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" "@ @@ -1565,10 +1568,10 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_gt<mode>3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x") (gt:MMXMODEI - (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv") - (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODEI 1 "register_operand" "0,0,x") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ pcmpgt<mmxvecsize>\t{%2, %0|%0, %2} @@ -1594,19 +1597,20 @@ "operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));") (define_insn "mmx_andnot<mode>3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v") (and:MMXMODEI - (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")) - (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,x,v")) + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ pandn\t{%2, %0|%0, %2} pandn\t{%2, %0|%0, %2} - vpandn\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "*,sse2_noavx,avx") - (set_attr "mmx_isa" "native,*,*") - (set_attr "type" "mmxadd,sselog,sselog") - (set_attr "mode" "DI,TI,TI")]) + vpandn\t{%2, %1, %0|%0, %1, %2} + vpandnd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse2_noavx,avx,avx512vl") + (set_attr "mmx_isa" "native,*,*,*") + (set_attr "type" "mmxadd,sselog,sselog,sselog") + (set_attr "mode" "DI,TI,TI,TI")]) (define_expand "mmx_<code><mode>3" [(set (match_operand:MMXMODEI 0 "register_operand") @@ -1625,20 +1629,21 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*mmx_<code><mode>3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v") (any_logic:MMXMODEI - (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x,v") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ p<logic>\t{%2, %0|%0, %2} p<logic>\t{%2, %0|%0, %2} - vp<logic>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "*,sse2_noavx,avx") - (set_attr "mmx_isa" "native,*,*") - (set_attr "type" "mmxadd,sselog,sselog") - (set_attr "mode" "DI,TI,TI")]) + vp<logic>\t{%2, %1, %0|%0, %1, %2} + vp<logic>d\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse2_noavx,avx,avx512vl") + (set_attr "mmx_isa" "native,*,*,*") + (set_attr "type" "mmxadd,sselog,sselog,sselog") + (set_attr "mode" "DI,TI,TI,TI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -1652,12 +1657,12 @@ (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")]) (define_insn_and_split "mmx_pack<s_trunsuffix>swb" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (vec_concat:V8QI (any_s_truncate:V4QI - (match_operand:V4HI 1 "register_operand" "0,0,Yv")) + (match_operand:V4HI 1 "register_operand" "0,0,Yw")) (any_s_truncate:V4QI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))] + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ pack<s_trunsuffix>swb\t{%2, %0|%0, %2} @@ -1672,12 +1677,12 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_packssdw" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (vec_concat:V4HI (ss_truncate:V2HI - (match_operand:V2SI 1 "register_operand" "0,0,Yv")) + (match_operand:V2SI 1 "register_operand" "0,0,Yw")) (ss_truncate:V2HI - (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))] + (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yw"))))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ packssdw\t{%2, %0|%0, %2} @@ -1692,11 +1697,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_punpckhbw" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (vec_select:V8QI (vec_concat:V16QI - (match_operand:V8QI 1 "register_operand" "0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")) + (match_operand:V8QI 1 "register_operand" "0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")) (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) (const_int 6) (const_int 14) @@ -1715,11 +1720,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_punpcklbw" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (vec_select:V8QI (vec_concat:V16QI - (match_operand:V8QI 1 "register_operand" "0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")) + (match_operand:V8QI 1 "register_operand" "0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")) (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) (const_int 2) (const_int 10) @@ -1738,11 +1743,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_punpckhwd" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (vec_select:V4HI (vec_concat:V8HI - (match_operand:V4HI 1 "register_operand" "0,0,Yv") - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")) + (match_operand:V4HI 1 "register_operand" "0,0,Yw") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")) (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] "TARGET_MMX || TARGET_MMX_WITH_SSE" @@ -1759,11 +1764,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_punpcklwd" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (vec_select:V4HI (vec_concat:V8HI - (match_operand:V4HI 1 "register_operand" "0,0,Yv") - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")) + (match_operand:V4HI 1 "register_operand" "0,0,Yw") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] "TARGET_MMX || TARGET_MMX_WITH_SSE" @@ -1866,11 +1871,11 @@ }) (define_insn "*mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,YW") (vec_merge:V4HI (vec_duplicate:V4HI (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm")) - (match_operand:V4HI 1 "register_operand" "0,0,Yv") + (match_operand:V4HI 1 "register_operand" "0,0,YW") (match_operand:SI 3 "const_int_operand")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) @@ -1902,11 +1907,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "*mmx_pinsrb" - [(set (match_operand:V8QI 0 "register_operand" "=x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=x,YW") (vec_merge:V8QI (vec_duplicate:V8QI (match_operand:QI 2 "nonimmediate_operand" "rm,rm")) - (match_operand:V8QI 1 "register_operand" "0,Yv") + (match_operand:V8QI 1 "register_operand" "0,YW") (match_operand:SI 3 "const_int_operand")))] "TARGET_MMX_WITH_SSE && TARGET_SSE4_1 && ((unsigned) exact_log2 (INTVAL (operands[3])) @@ -1940,7 +1945,7 @@ (define_insn "*mmx_pextrw" [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,r,m") (vec_select:HI - (match_operand:V4HI 1 "register_operand" "y,Yv,Yv") + (match_operand:V4HI 1 "register_operand" "y,YW,YW") (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" @@ -1959,7 +1964,7 @@ [(set (match_operand:SWI48 0 "register_operand" "=r,r") (zero_extend:SWI48 (vec_select:HI - (match_operand:V4HI 1 "register_operand" "y,Yv") + (match_operand:V4HI 1 "register_operand" "y,YW") (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" @@ -1976,7 +1981,7 @@ (define_insn "*mmx_pextrb" [(set (match_operand:QI 0 "nonimmediate_operand" "=r,m") (vec_select:QI - (match_operand:V8QI 1 "register_operand" "Yv,Yv") + (match_operand:V8QI 1 "register_operand" "YW,YW") (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n,n")])))] "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" "@ @@ -1993,7 +1998,7 @@ [(set (match_operand:SWI248 0 "register_operand" "=r") (zero_extend:SWI248 (vec_select:QI - (match_operand:V8QI 1 "register_operand" "Yv") + (match_operand:V8QI 1 "register_operand" "YW") (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" @@ -2394,15 +2399,15 @@ }) (define_insn "*mmx_uavgv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (truncate:V8QI (lshiftrt:V8HI (plus:V8HI (plus:V8HI (zero_extend:V8HI - (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")) + (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")) (zero_extend:V8HI - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))) (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) @@ -2440,15 +2445,15 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "*mmx_uavgv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (truncate:V4HI (lshiftrt:V4SI (plus:V4SI (plus:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")) (zero_extend:V4SI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))) (const_vector:V4SI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1))))] @@ -2483,9 +2488,9 @@ }) (define_insn "mmx_psadbw" - [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv") - (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")] + [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yw") + (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")] UNSPEC_PSADBW))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ca4372d..2cd8e04 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -566,7 +566,8 @@ (V4SI "v") (V8SI "v") (V16SI "v") (V2DI "v") (V4DI "v") (V8DI "v") (V4SF "v") (V8SF "v") (V16SF "v") - (V2DF "v") (V4DF "v") (V8DF "v")]) + (V2DF "v") (V4DF "v") (V8DF "v") + (TI "Yw") (V1TI "Yw") (V2TI "Yw") (V4TI "v")]) (define_mode_attr sse2_avx_avx512f [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") @@ -11736,10 +11737,10 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>" - [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v") + [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>") (sat_plusminus:VI12_AVX2_AVX512BW - (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v") - (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))] + (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,<v_Yw>") + (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition> && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" "@ @@ -11827,14 +11828,14 @@ "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") (define_insn "*<s>mul<mode>3_highpart<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>") (truncate:VI2_AVX2 (lshiftrt:<ssedoublemode> (mult:<ssedoublemode> (any_extend:<ssedoublemode> - (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")) + (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")) (any_extend:<ssedoublemode> - (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm"))) + (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m"))) (const_int 16))))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2])) && <mask_mode512bit_condition> && <mask_avx512bw_condition>" @@ -12128,19 +12129,19 @@ "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") (define_insn "*avx2_pmaddwd" - [(set (match_operand:V8SI 0 "register_operand" "=x,v") + [(set (match_operand:V8SI 0 "register_operand" "=Yw") (plus:V8SI (mult:V8SI (sign_extend:V8SI (vec_select:V8HI - (match_operand:V16HI 1 "nonimmediate_operand" "%x,v") + (match_operand:V16HI 1 "nonimmediate_operand" "%Yw") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))) (sign_extend:V8SI (vec_select:V8HI - (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm") + (match_operand:V16HI 2 "nonimmediate_operand" "Ywm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -12161,8 +12162,7 @@ "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") - (set_attr "isa" "*,avx512bw") - (set_attr "prefix" "vex,evex") + (set_attr "prefix" "vex") (set_attr "mode" "OI")]) (define_expand "sse2_pmaddwd" @@ -12192,17 +12192,17 @@ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") (define_insn "*sse2_pmaddwd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,v") + [(set (match_operand:V4SI 0 "register_operand" "=x,Yw") (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "vector_operand" "%0,x,v") + (match_operand:V8HI 1 "vector_operand" "%0,Yw") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm") + (match_operand:V8HI 2 "vector_operand" "xBm,Ywm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)])))) (mult:V4SI @@ -12217,13 +12217,12 @@ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ pmaddwd\t{%2, %0|%0, %2} - vpmaddwd\t{%2, %1, %0|%0, %1, %2} vpmaddwd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "avx512dq_mul<mode>3<mask_name>" @@ -12449,10 +12448,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "ashr<mode>3" - [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,<v_Yw>") (ashiftrt:VI24_AVX2 - (match_operand:VI24_AVX2 1 "register_operand" "0,x") - (match_operand:DI 2 "nonmemory_operand" "xN,xN")))] + (match_operand:VI24_AVX2 1 "register_operand" "0,<v_Yw>") + (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))] "TARGET_SSE2" "@ psra<ssemodesuffix>\t{%2, %0|%0, %2} @@ -12496,10 +12495,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "<insn><mode>3" - [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,<v_Yw>") (any_lshift:VI248_AVX2 - (match_operand:VI248_AVX2 1 "register_operand" "0,x") - (match_operand:DI 2 "nonmemory_operand" "xN,xN")))] + (match_operand:VI248_AVX2 1 "register_operand" "0,<v_Yw>") + (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))] "TARGET_SSE2" "@ p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} @@ -12571,9 +12570,9 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse2_avx2>_<insn><mode>3" - [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,Yw") (any_lshift:VIMAX_AVX2 - (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") + (match_operand:VIMAX_AVX2 1 "register_operand" "0,Yw") (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] "TARGET_SSE2" { @@ -12771,20 +12770,19 @@ (set_attr "mode" "TI")]) (define_insn "*<code>v8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,v") + [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") (smaxmin:V8HI - (match_operand:V8HI 1 "vector_operand" "%0,x,v") - (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))] + (match_operand:V8HI 1 "vector_operand" "%0,Yw") + (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ p<maxmin_int>w\t{%2, %0|%0, %2} - vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2} vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix_extra" "*,1,1") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "*,1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_expand "<code><mode>3" @@ -12856,20 +12854,19 @@ (set_attr "mode" "TI")]) (define_insn "*<code>v16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,v") + [(set (match_operand:V16QI 0 "register_operand" "=x,Yw") (umaxmin:V16QI - (match_operand:V16QI 1 "vector_operand" "%0,x,v") - (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))] + (match_operand:V16QI 1 "vector_operand" "%0,Yw") + (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ p<maxmin_int>b\t{%2, %0|%0, %2} - vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2} vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix_extra" "*,1,1") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "*,1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -13888,57 +13885,54 @@ }) (define_insn "<sse2_avx2>_packsswb<mask_name>" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>") (vec_concat:VI1_AVX512 (ss_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>")) (ss_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))] + (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packsswb\t{%2, %0|%0, %2} - vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,<mask_prefix>,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,<mask_prefix>") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse2_avx2>_packssdw<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>") (vec_concat:VI2_AVX2 (ss_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>")) (ss_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))] + (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packssdw\t{%2, %0|%0, %2} - vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,<mask_prefix>,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,<mask_prefix>") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse2_avx2>_packuswb<mask_name>" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>") (vec_concat:VI1_AVX512 (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>")) (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))] + (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packuswb\t{%2, %0|%0, %2} - vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,<mask_prefix>,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,<mask_prefix>") (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx512bw_interleave_highv64qi<mask_name>" @@ -13986,11 +13980,11 @@ (set_attr "mode" "XI")]) (define_insn "avx2_interleave_highv32qi<mask_name>" - [(set (match_operand:V32QI 0 "register_operand" "=v") + [(set (match_operand:V32QI 0 "register_operand" "=Yw") (vec_select:V32QI (vec_concat:V64QI - (match_operand:V32QI 1 "register_operand" "v") - (match_operand:V32QI 2 "nonimmediate_operand" "vm")) + (match_operand:V32QI 1 "register_operand" "Yw") + (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 8) (const_int 40) (const_int 9) (const_int 41) (const_int 10) (const_int 42) @@ -14007,18 +14001,18 @@ (const_int 29) (const_int 61) (const_int 30) (const_int 62) (const_int 31) (const_int 63)])))] - "TARGET_AVX2 && <mask_avx512vl_condition>" + "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "<mask_prefix>") (set_attr "mode" "OI")]) (define_insn "vec_interleave_highv16qi<mask_name>" - [(set (match_operand:V16QI 0 "register_operand" "=x,v") + [(set (match_operand:V16QI 0 "register_operand" "=x,Yw") (vec_select:V16QI (vec_concat:V32QI - (match_operand:V16QI 1 "register_operand" "0,v") - (match_operand:V16QI 2 "vector_operand" "xBm,vm")) + (match_operand:V16QI 1 "register_operand" "0,Yw") + (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 8) (const_int 24) (const_int 9) (const_int 25) (const_int 10) (const_int 26) @@ -14027,7 +14021,7 @@ (const_int 13) (const_int 29) (const_int 14) (const_int 30) (const_int 15) (const_int 31)])))] - "TARGET_SSE2 && <mask_avx512vl_condition>" + "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>" "@ punpckhbw\t{%2, %0|%0, %2} vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" @@ -14082,11 +14076,11 @@ (set_attr "mode" "XI")]) (define_insn "avx2_interleave_lowv32qi<mask_name>" - [(set (match_operand:V32QI 0 "register_operand" "=v") + [(set (match_operand:V32QI 0 "register_operand" "=Yw") (vec_select:V32QI (vec_concat:V64QI - (match_operand:V32QI 1 "register_operand" "v") - (match_operand:V32QI 2 "nonimmediate_operand" "vm")) + (match_operand:V32QI 1 "register_operand" "Yw") + (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 0) (const_int 32) (const_int 1) (const_int 33) (const_int 2) (const_int 34) @@ -14110,11 +14104,11 @@ (set_attr "mode" "OI")]) (define_insn "vec_interleave_lowv16qi<mask_name>" - [(set (match_operand:V16QI 0 "register_operand" "=x,v") + [(set (match_operand:V16QI 0 "register_operand" "=x,Yw") (vec_select:V16QI (vec_concat:V32QI - (match_operand:V16QI 1 "register_operand" "0,v") - (match_operand:V16QI 2 "vector_operand" "xBm,vm")) + (match_operand:V16QI 1 "register_operand" "0,Yw") + (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 0) (const_int 16) (const_int 1) (const_int 17) (const_int 2) (const_int 18) @@ -14162,11 +14156,11 @@ (set_attr "mode" "XI")]) (define_insn "avx2_interleave_highv16hi<mask_name>" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (vec_select:V16HI (vec_concat:V32HI - (match_operand:V16HI 1 "register_operand" "v") - (match_operand:V16HI 2 "nonimmediate_operand" "vm")) + (match_operand:V16HI 1 "register_operand" "Yw") + (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 4) (const_int 20) (const_int 5) (const_int 21) (const_int 6) (const_int 22) @@ -14182,11 +14176,11 @@ (set_attr "mode" "OI")]) (define_insn "vec_interleave_highv8hi<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=x,v") + [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") (vec_select:V8HI (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "0,v") - (match_operand:V8HI 2 "vector_operand" "xBm,vm")) + (match_operand:V8HI 1 "register_operand" "0,Yw") + (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) (const_int 6) (const_int 14) @@ -14230,11 +14224,11 @@ (set_attr "mode" "XI")]) (define_insn "avx2_interleave_lowv16hi<mask_name>" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (vec_select:V16HI (vec_concat:V32HI - (match_operand:V16HI 1 "register_operand" "v") - (match_operand:V16HI 2 "nonimmediate_operand" "vm")) + (match_operand:V16HI 1 "register_operand" "Yw") + (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 0) (const_int 16) (const_int 1) (const_int 17) (const_int 2) (const_int 18) @@ -14250,11 +14244,11 @@ (set_attr "mode" "OI")]) (define_insn "vec_interleave_lowv8hi<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=x,v") + [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") (vec_select:V8HI (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "0,v") - (match_operand:V8HI 2 "vector_operand" "xBm,vm")) + (match_operand:V8HI 1 "register_operand" "0,Yw") + (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) (const_int 2) (const_int 10) @@ -15190,9 +15184,9 @@ }) (define_insn "avx2_pshuflw_1<mask_name>" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (vec_select:V16HI - (match_operand:V16HI 1 "nonimmediate_operand" "vm") + (match_operand:V16HI 1 "nonimmediate_operand" "Ywm") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -15264,9 +15258,9 @@ }) (define_insn "sse2_pshuflw_1<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=v") + [(set (match_operand:V8HI 0 "register_operand" "=Yw") (vec_select:V8HI - (match_operand:V8HI 1 "vector_operand" "vBm") + (match_operand:V8HI 1 "vector_operand" "YwBm") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -15347,9 +15341,9 @@ }) (define_insn "avx2_pshufhw_1<mask_name>" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (vec_select:V16HI - (match_operand:V16HI 1 "nonimmediate_operand" "vm") + (match_operand:V16HI 1 "nonimmediate_operand" "Ywm") (parallel [(const_int 0) (const_int 1) (const_int 2) @@ -15421,9 +15415,9 @@ }) (define_insn "sse2_pshufhw_1<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=v") + [(set (match_operand:V8HI 0 "register_operand" "=Yw") (vec_select:V8HI - (match_operand:V8HI 1 "vector_operand" "vBm") + (match_operand:V8HI 1 "vector_operand" "YwBm") (parallel [(const_int 0) (const_int 1) (const_int 2) @@ -16213,15 +16207,15 @@ }) (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>" - [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v") + [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>") (truncate:VI12_AVX2_AVX512BW (lshiftrt:<ssedoublemode> (plus:<ssedoublemode> (plus:<ssedoublemode> (zero_extend:<ssedoublemode> - (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v")) + (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,<v_Yw>")) (zero_extend:<ssedoublemode> - (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm"))) + (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m"))) (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand")) (const_int 1))))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition> @@ -16238,10 +16232,10 @@ ;; The correct representation for this is absolutely enormous, and ;; surely not generally useful. (define_insn "<sse2_avx2>_psadbw" - [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v") + [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,YW") (unspec:VI8_AVX2_AVX512BW - [(match_operand:<ssebytemode> 1 "register_operand" "0,v") - (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")] + [(match_operand:<ssebytemode> 1 "register_operand" "0,YW") + (match_operand:<ssebytemode> 2 "vector_operand" "xBm,YWm")] UNSPEC_PSADBW))] "TARGET_SSE2" "@ @@ -16815,12 +16809,12 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "avx2_pmaddubsw256" - [(set (match_operand:V16HI 0 "register_operand" "=x,v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (ss_plus:V16HI (mult:V16HI (zero_extend:V16HI (vec_select:V16QI - (match_operand:V32QI 1 "register_operand" "x,v") + (match_operand:V32QI 1 "register_operand" "Yw") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -16831,7 +16825,7 @@ (const_int 28) (const_int 30)]))) (sign_extend:V16HI (vec_select:V16QI - (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm") + (match_operand:V32QI 2 "nonimmediate_operand" "Ywm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -16863,10 +16857,9 @@ (const_int 29) (const_int 31)]))))))] "TARGET_AVX2" "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "*,avx512bw") - (set_attr "type" "sseiadd") + [(set_attr "type" "sseiadd") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex,evex") + (set_attr "prefix" "vex") (set_attr "mode" "OI")]) ;; The correct representation for this is absolutely enormous, and @@ -16919,19 +16912,19 @@ (set_attr "mode" "XI")]) (define_insn "ssse3_pmaddubsw128" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,v") + [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") (ss_plus:V8HI (mult:V8HI (zero_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "register_operand" "0,x,v") + (match_operand:V16QI 1 "register_operand" "0,Yw") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))) (sign_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm") + (match_operand:V16QI 2 "vector_operand" "xBm,Ywm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -16952,14 +16945,13 @@ "TARGET_SSSE3" "@ pmaddubsw\t{%2, %0|%0, %2} - vpmaddubsw\t{%2, %1, %0|%0, %1, %2} vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_pmaddubsw" @@ -17065,16 +17057,16 @@ }) (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>") (truncate:VI2_AVX2 (lshiftrt:<ssedoublemode> (plus:<ssedoublemode> (lshiftrt:<ssedoublemode> (mult:<ssedoublemode> (sign_extend:<ssedoublemode> - (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v")) + (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")) (sign_extend:<ssedoublemode> - (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm"))) + (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m"))) (const_int 14)) (match_operand:VI2_AVX2 3 "const1_operand")) (const_int 1))))] @@ -17082,13 +17074,12 @@ && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ pmulhrsw\t{%2, %0|%0, %2} - vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2} vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseimul") - (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "<sseinsnmode>")]) (define_expand "smulhrsv4hi3" @@ -17160,21 +17151,20 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>") (unspec:VI1_AVX512 - [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v") - (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")] + [(match_operand:VI1_AVX512 1 "register_operand" "0,<v_Yw>") + (match_operand:VI1_AVX512 2 "vector_operand" "xBm,<v_Yw>m")] UNSPEC_PSHUFB))] "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ pshufb\t{%2, %0|%0, %2} - vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") - (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "btver2_decode" "vector") (set_attr "mode" "<sseinsnmode>")]) @@ -17274,11 +17264,11 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "<ssse3_avx2>_palignr<mode>" - [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v") + [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,<v_Yw>") (unspec:SSESCALARMODE - [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v") - (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")] + [(match_operand:SSESCALARMODE 1 "register_operand" "0,<v_Yw>") + (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,<v_Yw>m") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] UNSPEC_PALIGNR))] "TARGET_SSSE3" { @@ -17289,19 +17279,18 @@ case 0: return "palignr\t{%3, %2, %0|%0, %2, %3}"; case 1: - case 2: return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseishft") (set_attr "atom_unit" "sishuf") - (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) (define_insn_and_split "ssse3_palignrdi" @@ -17367,9 +17356,9 @@ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_insn "*abs<mode>2" - [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v") + [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=<v_Yw>") (abs:VI1248_AVX512VL_AVX512BW - (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))] + (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "<v_Yw>Bm")))] "TARGET_SSSE3" "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") @@ -17731,22 +17720,21 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse4_1_avx2>_packusdw<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>") (vec_concat:VI2_AVX2 (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>")) (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))] + (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))] "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packusdw\t{%2, %0|%0, %2} packusdw\t{%2, %0|%0, %2} - vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,noavx,avx,avx512bw") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,orig,<mask_prefix>,evex") + (set_attr "prefix" "orig,orig,<mask_prefix>") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse4_1_avx2>_pblendvb" @@ -17867,9 +17855,9 @@ (set_attr "mode" "TI")]) (define_insn "avx2_<code>v16qiv16hi2<mask_name>" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (any_extend:V16HI - (match_operand:V16QI 1 "nonimmediate_operand" "vm")))] + (match_operand:V16QI 1 "nonimmediate_operand" "Ywm")))] "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" [(set_attr "type" "ssemov") @@ -17935,10 +17923,10 @@ "TARGET_AVX512BW") (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>" - [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw") (any_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "register_operand" "Yr,*x,v") + (match_operand:V16QI 1 "register_operand" "Yr,*x,Yw") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) @@ -17952,7 +17940,7 @@ (set_attr "mode" "TI")]) (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1" - [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw") (any_extend:V8HI (match_operand:V8QI 1 "memory_operand" "m,m,m")))] "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c new file mode 100644 index 0000000..8bb3a03 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c @@ -0,0 +1,94 @@ +/* PR target/99321 */ +/* { dg-do assemble { target lp64 } } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target assembler_march_noavx512bw } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */ + +#include <x86intrin.h> + +typedef unsigned char V1 __attribute__((vector_size (16))); +typedef unsigned char V2 __attribute__((vector_size (32))); +typedef unsigned short V3 __attribute__((vector_size (16))); +typedef unsigned short V4 __attribute__((vector_size (32))); + +void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_abs_epi8 ((__m128i) b); __asm ("" : : "v" (a)); } +void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_abs_epi8 ((__m256i) b); __asm ("" : : "v" (a)); } +void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_abs_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_abs_epi16 ((__m256i) b); __asm ("" : : "v" (a)); } +void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_adds_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_adds_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_adds_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f9 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_subs_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f10 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_subs_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f11 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_subs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f12 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_subs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f13 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_adds_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f14 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_adds_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f15 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_adds_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f16 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f17 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_subs_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f18 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_subs_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f19 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_subs_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f20 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_subs_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f21 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_alignr_epi8 ((__m128i) a, (__m128i) b, 5); __asm ("" : : "v" (a)); } +void f22 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, 5); __asm ("" : : "v" (a)); } +void f23 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f24 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_avg_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f25 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_avg_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f26 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_avg_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f27 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_avg_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f28 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_broadcastb_epi8 ((__m128i) b); __asm ("" : : "v" (a)); } +void f29 (void) { register V2 a __asm ("%xmm16"); register V1 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_broadcastb_epi8 ((__m128i) b); __asm ("" : : "v" (a)); } +void f30 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_broadcastw_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f31 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_broadcastw_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +int f32 (void) { register V1 a __asm ("%xmm16"); __asm ("" : "=v" (a)); return _mm_extract_epi8 ((__m128i) a, 3); } +int f33 (void) { register V3 a __asm ("%xmm16"); __asm ("" : "=v" (a)); return _mm_extract_epi16 ((__m128i) a, 3); } +void f34 (int c) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_insert_epi8 ((__m128i) b, c, 5); __asm ("" : : "v" (a)); } +void f35 (int c) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_insert_epi16 ((__m128i) b, c, 5); __asm ("" : : "v" (a)); } +void f36 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_maddubs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f37 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_maddubs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f38 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_madd_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f39 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_madd_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f40 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_cvtepi8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f41 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_cvtepi8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f42 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_cvtepu8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f43 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_cvtepu8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f44 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhrs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f45 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhrs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f46 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhi_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f47 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhi_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f48 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhi_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f49 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhi_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f50 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_sad_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f51 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_sad_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f52 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_shuffle_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f53 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_shuffle_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f54 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_shufflehi_epi16 ((__m128i) b, 0x5b); __asm ("" : : "v" (a)); } +void f55 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_shufflehi_epi16 ((__m256i) b, 0x5b); __asm ("" : : "v" (a)); } +void f56 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_shufflelo_epi16 ((__m128i) b, 0x5b); __asm ("" : : "v" (a)); } +void f57 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_shufflelo_epi16 ((__m256i) b, 0x5b); __asm ("" : : "v" (a)); } +void f58 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_slli_si128 ((__m128i) b, 3); __asm ("" : : "v" (a)); } +void f59 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_slli_si256 ((__m256i) b, 3); __asm ("" : : "v" (a)); } +void f60 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_srli_si128 ((__m128i) b, 3); __asm ("" : : "v" (a)); } +void f61 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_srli_si256 ((__m256i) b, 3); __asm ("" : : "v" (a)); } +void f62 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_sll_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f63 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_sll_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f64 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_slli_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); } +void f65 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_slli_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); } +void f66 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srl_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f67 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srl_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f68 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srli_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); } +void f69 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srli_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); } +void f70 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_sra_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f71 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_sra_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f72 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srai_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); } +void f73 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srai_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); } +void f74 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_unpackhi_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f75 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_unpackhi_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f76 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_unpackhi_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f77 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_unpackhi_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f78 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_unpacklo_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f79 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_unpacklo_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f80 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_unpacklo_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f81 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_unpacklo_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } |