aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2021-03-07 10:27:28 +0100
committerJakub Jelinek <jakub@redhat.com>2021-03-07 10:27:28 +0100
commita18ebd6c439227b048a91fbfa66f5983f884c157 (patch)
treef260f09a34587bf159636ca6b0b7fafcbba5f6f1
parent0ad6a2e2f0c667f9916cfcdb81f41f6055f1d0b3 (diff)
downloadgcc-a18ebd6c439227b048a91fbfa66f5983f884c157.zip
gcc-a18ebd6c439227b048a91fbfa66f5983f884c157.tar.gz
gcc-a18ebd6c439227b048a91fbfa66f5983f884c157.tar.bz2
i386: Fix some -mavx512vl -mno-avx512bw bugs [PR99321]
As I wrote in the mail with the previous PR99321 fix, we have various bugs where we emit instructions that need avx512bw and avx512vl ISAs when compiling with -mavx512vl -mno-avx512bw. Without the following patch, the attached testcase fails with: /tmp/ccW4PsfG.s: Assembler messages: /tmp/ccW4PsfG.s:9: Error: unsupported instruction `vpaddb' /tmp/ccW4PsfG.s:20: Error: unsupported instruction `vpaddb' /tmp/ccW4PsfG.s:31: Error: unsupported instruction `vpaddw' /tmp/ccW4PsfG.s:42: Error: unsupported instruction `vpaddw' /tmp/ccW4PsfG.s:53: Error: unsupported instruction `vpsubb' /tmp/ccW4PsfG.s:64: Error: unsupported instruction `vpsubb' /tmp/ccW4PsfG.s:75: Error: unsupported instruction `vpsubw' /tmp/ccW4PsfG.s:86: Error: unsupported instruction `vpsubw' /tmp/ccW4PsfG.s:97: Error: unsupported instruction `vpmullw' /tmp/ccW4PsfG.s:108: Error: unsupported instruction `vpmullw' /tmp/ccW4PsfG.s:133: Error: unsupported instruction `vpminub' /tmp/ccW4PsfG.s:144: Error: unsupported instruction `vpminuw' /tmp/ccW4PsfG.s:155: Error: unsupported instruction `vpminuw' /tmp/ccW4PsfG.s:166: Error: unsupported instruction `vpminsb' /tmp/ccW4PsfG.s:177: Error: unsupported instruction `vpminsb' /tmp/ccW4PsfG.s:202: Error: unsupported instruction `vpminsw' /tmp/ccW4PsfG.s:227: Error: unsupported instruction `vpmaxub' /tmp/ccW4PsfG.s:238: Error: unsupported instruction `vpmaxuw' /tmp/ccW4PsfG.s:249: Error: unsupported instruction `vpmaxuw' /tmp/ccW4PsfG.s:260: Error: unsupported instruction `vpmaxsb' /tmp/ccW4PsfG.s:271: Error: unsupported instruction `vpmaxsb' /tmp/ccW4PsfG.s:296: Error: unsupported instruction `vpmaxsw' We already have Yw constraint which is equivalent to v for -mavx512bw -mavx512vl and to nothing otherwise, per discussions this patch changes it to stand for x otherwise. As it is an undocumented internal constraint, hopefully it won't affect any inline asm in the wild. For the instructions that need both we need to use Yw and v for modes that don't need that. 2021-03-07 Jakub Jelinek <jakub@redhat.com> PR target/99321 * config/i386/constraints.md (Yw): Use SSE_REGS if TARGET_SSE but TARGET_AVX512BW or TARGET_AVX512VL is not set. Adjust description and comment. * config/i386/sse.md (v_Yw): New define_mode_attr. (*<insn><mode>3, *mul<mode>3<mask_name>, *avx2_<code><mode>3, *sse4_1_<code><mode>3<mask_name>): Use <v_Yw> instead of v in constraints. * config/i386/mmx.md (mmx_pshufw_1, *vec_dupv4hi): Use Yw instead of xYw in constraints. * lib/target-supports.exp (check_effective_target_assembler_march_noavx512bw): New effective target. * gcc.target/i386/avx512vl-pr99321-1.c: New test.
-rw-r--r--gcc/config/i386/constraints.md6
-rw-r--r--gcc/config/i386/mmx.md8
-rw-r--r--gcc/config/i386/sse.md38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-pr99321-1.c39
-rw-r--r--gcc/testsuite/lib/target-supports.exp10
5 files changed, 79 insertions, 22 deletions
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 0ccefa8..a8db33e 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,7 +110,7 @@
;; v any EVEX encodable SSE register for AVX512VL target,
;; otherwise any SSE register
;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
-;; target.
+;; target, otherwise any SSE register.
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@@ -148,8 +148,8 @@
"@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
(define_register_constraint "Yw"
- "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
- "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.")
+ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
;; We use the B prefix to denote any number of internal operands:
;; f FLAGS_REG
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0f51e61..c6a2882 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2021,9 +2021,9 @@
})
(define_insn "mmx_pshufw_1"
- [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
(vec_select:V4HI
- (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
+ (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -2105,10 +2105,10 @@
(set_attr "mode" "DI,TI")])
(define_insn "*vec_dupv4hi"
- [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
+ [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
(vec_duplicate:V4HI
(truncate:HI
- (match_operand:SI 1 "register_operand" "0,xYw"))))]
+ (match_operand:SI 1 "register_operand" "0,Yw"))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
"@
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index db5be59..ca4372d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -560,6 +560,14 @@
(V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
(V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
+(define_mode_attr v_Yw
+ [(V16QI "Yw") (V32QI "Yw") (V64QI "v")
+ (V8HI "Yw") (V16HI "Yw") (V32HI "v")
+ (V4SI "v") (V8SI "v") (V16SI "v")
+ (V2DI "v") (V4DI "v") (V8DI "v")
+ (V4SF "v") (V8SF "v") (V16SF "v")
+ (V2DF "v") (V4DF "v") (V8DF "v")])
+
(define_mode_attr sse2_avx_avx512f
[(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
(V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
@@ -11677,10 +11685,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<insn><mode>3"
- [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>")
(plusminus:VI_AVX2
- (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
- (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
+ (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>")
+ (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -11790,9 +11798,9 @@
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*mul<mode>3<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
- (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
- (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
+ (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")
+ (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
&& <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
@@ -12618,10 +12626,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*avx2_<code><mode>3"
- [(set (match_operand:VI124_256 0 "register_operand" "=v")
+ [(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>")
(maxmin:VI124_256
- (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
- (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
+ (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>")
+ (match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))]
"TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseiadd")
@@ -12745,10 +12753,10 @@
})
(define_insn "*sse4_1_<code><mode>3<mask_name>"
- [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>")
(smaxmin:VI14_128
- (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
- (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
+ (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>")
+ (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
"TARGET_SSE4_1
&& <mask_mode512bit_condition>
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@@ -12830,10 +12838,10 @@
})
(define_insn "*sse4_1_<code><mode>3<mask_name>"
- [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>")
(umaxmin:VI24_128
- (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
- (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
+ (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>")
+ (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
"TARGET_SSE4_1
&& <mask_mode512bit_condition>
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-1.c
new file mode 100644
index 0000000..9da5eee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-1.c
@@ -0,0 +1,39 @@
+/* PR target/99321 */
+/* { dg-do assemble { target lp64 } } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target assembler_march_noavx512bw } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */
+
+#include <x86intrin.h>
+
+typedef unsigned char V1 __attribute__((vector_size (16)));
+typedef unsigned char V2 __attribute__((vector_size (32)));
+typedef unsigned short V3 __attribute__((vector_size (16)));
+typedef unsigned short V4 __attribute__((vector_size (32)));
+
+void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
+void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
+void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
+void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
+void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
+void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
+void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
+void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
+void f9 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
+void f10 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
+void f11 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f12 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f13 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f14 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f15 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f16 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f17 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f18 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f19 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f20 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f21 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f22 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f23 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f24 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f25 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f26 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index c797db4..570d5d3 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8945,6 +8945,16 @@ proc check_effective_target_avx512bw { } {
} "-mavx512bw" ]
}
+# Return 1 if -Wa,-march=+noavx512bw is supported.
+proc check_effective_target_assembler_march_noavx512bw {} {
+ if { [istarget i?86*-*-*] || [istarget x86_64*-*-*] } {
+ return [check_no_compiler_messages assembler_march_noavx512bw object {
+ void foo (void) {}
+ } "-mno-avx512bw -Wa,-march=+noavx512bw"]
+ }
+ return 0
+}
+
# Return 1 if avx512vp2intersect instructions can be compiled.
proc check_effective_target_avx512vp2intersect { } {
return [check_no_compiler_messages avx512vp2intersect object {