aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-02-12 11:58:35 +0100
committerJakub Jelinek <jakub@redhat.com>2020-02-12 11:58:35 +0100
commit62fc0a6ce28c502fc6a7b7c09157840bf98f945f (patch)
tree580a9ecec7cd5a66b850474ddd70410836300e12
parent12c763c68a28d0c002b382b15ec138a8bc01e3bf (diff)
downloadgcc-62fc0a6ce28c502fc6a7b7c09157840bf98f945f.zip
gcc-62fc0a6ce28c502fc6a7b7c09157840bf98f945f.tar.gz
gcc-62fc0a6ce28c502fc6a7b7c09157840bf98f945f.tar.bz2
i386: Fix up vec_extract_lo* patterns [PR93670]
The VEXTRACT* insns have way too many different CPUID feature flags (ATT syntax) vextractf128 $imm, %ymm, %xmm/mem AVX vextracti128 $imm, %ymm, %xmm/mem AVX2 vextract{f,i}32x4 $imm, %ymm, %xmm/mem {k}{z} AVX512VL+AVX512F vextract{f,i}32x4 $imm, %zmm, %xmm/mem {k}{z} AVX512F vextract{f,i}64x2 $imm, %ymm, %xmm/mem {k}{z} AVX512VL+AVX512DQ vextract{f,i}64x2 $imm, %zmm, %xmm/mem {k}{z} AVX512DQ vextract{f,i}32x8 $imm, %zmm, %ymm/mem {k}{z} AVX512DQ vextract{f,i}64x4 $imm, %zmm, %ymm/mem {k}{z} AVX512F As the testcase shows and the patch too, we didn't get it right in all cases. The first hunk is about avx512vl_vextractf128v8s[if] incorrectly requiring TARGET_AVX512DQ. The corresponding insn is the first vextract{f,i}32x4 above, so it requires VL+F, and the builtins have it correct (TARGET_AVX512VL implies TARGET_AVX512F): BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI) We only need TARGET_AVX512DQ for avx512vl_vextractf128v4d[if]. The second hunk is about vec_extract_lo_v16s[if]{,_mask}. These are using the vextract{f,i}32x8 insns (AVX512DQ above), but we weren't requiring that, but instead incorrectly && 1 for non-masked and && (64 == 64 && TARGET_AVX512VL) for masked insns. This is extraction from ZMM, so it doesn't need VL for anything. The hunk actually only requires TARGET_AVX512DQ when the insn is masked, if it is not masked, when TARGET_AVX512DQ isn't available we can use vextract{f,i}64x4 instead which is available already in TARGET_AVX512F and does the same thing, extracts the low 256 bits from 512 bits vector (often we split it into just nothing, but there are some special cases like when using xmm16+ when we can't without AVX512VL). The last hunk is about vec_extract_lo_v8s[if]{,_mask}. The non-_mask suffixed ones are ok already and just split into nothing (lowpart subreg). The masked ones were incorrectly requiring TARGET_AVX512VL and TARGET_AVX512DQ, when we only need TARGET_AVX512VL. 2020-02-12 Jakub Jelinek <jakub@redhat.com> PR target/93670 * config/i386/sse.md (VI48F_256_DQ): New mode iterator. (avx512vl_vextractf128<mode>): Use it instead of VI48F_256. Remove TARGET_AVX512DQ from condition. (vec_extract_lo_<mode><mask_name>): Use <mask_avx512dq_condition> instead of <mask_mode512bit_condition> in condition. If TARGET_AVX512DQ is false, emit vextract*64x4 instead of vextract*32x8. (vec_extract_lo_<mode><mask_name>): Drop <mask_avx512dq_condition> from condition. * gcc.target/i386/avx512vl-pr93670.c: New test.
-rw-r--r--gcc/ChangeLog13
-rw-r--r--gcc/config/i386/sse.md18
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-pr93670.c77
4 files changed, 108 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e0d6c7f..6fa4768 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2020-02-12 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/93670
+ * config/i386/sse.md (VI48F_256_DQ): New mode iterator.
+ (avx512vl_vextractf128<mode>): Use it instead of VI48F_256. Remove
+ TARGET_AVX512DQ from condition.
+ (vec_extract_lo_<mode><mask_name>): Use <mask_avx512dq_condition>
+ instead of <mask_mode512bit_condition> in condition. If
+ TARGET_AVX512DQ is false, emit vextract*64x4 instead of
+ vextract*32x8.
+ (vec_extract_lo_<mode><mask_name>): Drop <mask_avx512dq_condition>
+ from condition.
+
2020-02-12 Kewen Lin <linkw@gcc.gnu.org>
PR target/91052
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 902ea31..ee4c914 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8719,13 +8719,16 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_mode_iterator VI48F_256_DQ
+ [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
+
(define_expand "avx512vl_vextractf128<mode>"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
- (match_operand:VI48F_256 1 "register_operand")
+ (match_operand:VI48F_256_DQ 1 "register_operand")
(match_operand:SI 2 "const_0_to_1_operand")
(match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
(match_operand:QI 4 "register_operand")]
- "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "TARGET_AVX512VL"
{
rtx (*insn)(rtx, rtx, rtx, rtx);
rtx dest = operands[0];
@@ -8793,14 +8796,19 @@
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
"TARGET_AVX512F
- && <mask_mode512bit_condition>
+ && <mask_avx512dq_condition>
&& (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
{
if (<mask_applied>
|| (!TARGET_AVX512VL
&& !REG_P (operands[0])
&& EXT_REX_SSE_REG_P (operands[1])))
- return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+ {
+ if (TARGET_AVX512DQ)
+ return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+ else
+ return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
+ }
else
return "#";
}
@@ -8910,7 +8918,7 @@
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
"TARGET_AVX
- && <mask_avx512vl_condition> && <mask_avx512dq_condition>
+ && <mask_avx512vl_condition>
&& (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
{
if (<mask_applied>)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7ac9328..433b0af 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2020-02-12 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/93670
+ * gcc.target/i386/avx512vl-pr93670.c: New test.
+
2020-02-12 Richard Biener <rguenther@suse.de>
PR testsuite/93697
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr93670.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr93670.c
new file mode 100644
index 0000000..3f232a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr93670.c
@@ -0,0 +1,77 @@
+/* PR target/93670 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512dq" } */
+
+#include <x86intrin.h>
+
+__m128i
+f1 (__m256i x)
+{
+ return _mm256_extracti32x4_epi32 (x, 0);
+}
+
+__m128i
+f2 (__m256i x, __m128i w, __mmask8 m)
+{
+ return _mm256_mask_extracti32x4_epi32 (w, m, x, 0);
+}
+
+__m128i
+f3 (__m256i x, __mmask8 m)
+{
+ return _mm256_maskz_extracti32x4_epi32 (m, x, 0);
+}
+
+__m128
+f4 (__m256 x)
+{
+ return _mm256_extractf32x4_ps (x, 0);
+}
+
+__m128
+f5 (__m256 x, __m128 w, __mmask8 m)
+{
+ return _mm256_mask_extractf32x4_ps (w, m, x, 0);
+}
+
+__m128
+f6 (__m256 x, __mmask8 m)
+{
+ return _mm256_maskz_extractf32x4_ps (m, x, 0);
+}
+
+__m128i
+f7 (__m256i x)
+{
+ return _mm256_extracti32x4_epi32 (x, 1);
+}
+
+__m128i
+f8 (__m256i x, __m128i w, __mmask8 m)
+{
+ return _mm256_mask_extracti32x4_epi32 (w, m, x, 1);
+}
+
+__m128i
+f9 (__m256i x, __mmask8 m)
+{
+ return _mm256_maskz_extracti32x4_epi32 (m, x, 1);
+}
+
+__m128
+f10 (__m256 x)
+{
+ return _mm256_extractf32x4_ps (x, 1);
+}
+
+__m128
+f11 (__m256 x, __m128 w, __mmask8 m)
+{
+ return _mm256_mask_extractf32x4_ps (w, m, x, 1);
+}
+
+__m128
+f12 (__m256 x, __mmask8 m)
+{
+ return _mm256_maskz_extractf32x4_ps (m, x, 1);
+}