aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-02-06 11:08:59 +0100
committerJakub Jelinek <jakub@redhat.com>2020-02-06 11:08:59 +0100
commit3f740c67dbb90177aa71d3c60ef9b0fd2f44dbd9 (patch)
treeda4f56c7d249b3940ba60ff223273b8326db16fe /gcc
parentcb3f06480a17f98579704b9927632627a3814c5c (diff)
downloadgcc-3f740c67dbb90177aa71d3c60ef9b0fd2f44dbd9.zip
gcc-3f740c67dbb90177aa71d3c60ef9b0fd2f44dbd9.tar.gz
gcc-3f740c67dbb90177aa71d3c60ef9b0fd2f44dbd9.tar.bz2
i386: Improve avx* vector concatenation [PR93594]
The following testcase shows that for _mm256_set*_m128i and similar intrinsics, we sometimes generate bad code. All 4 routines are expressing the same thing, a 128-bit vector zero padded to 256-bit vector, but only the 3rd one actually emits the desired vmovdqa %xmm0, %xmm0 insn, the others vpxor %xmm1, %xmm1, %xmm1; vinserti128 $0x1, %xmm1, %ymm0, %ymm0 The problem is that the cast builtins use UNSPEC_CAST which is after reload simplified using a splitter, but during combine it prevents optimizations. We do have avx_vec_concat* patterns that generate efficient code, both for this low part + zero concatenation special case and for other cases too, so the following define_insn_and_split just recognizes avx_vec_concat made of a low half of a cast and some other reg. 2020-02-06 Jakub Jelinek <jakub@redhat.com> PR target/93594 * config/i386/predicates.md (avx_identity_operand): New predicate. * config/i386/sse.md (*avx_vec_concat<mode>_1): New define_insn_and_split. * gcc.target/i386/avx2-pr93594.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/i386/predicates.md13
-rw-r--r--gcc/config/i386/sse.md18
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/avx2-pr93594.c32
5 files changed, 73 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b5b465a..382e313 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
2020-02-06 Jakub Jelinek <jakub@redhat.com>
+ PR target/93594
+ * config/i386/predicates.md (avx_identity_operand): New predicate.
+ * config/i386/sse.md (*avx_vec_concat<mode>_1): New
+ define_insn_and_split.
+
PR libgomp/93515
* omp-low.c (use_pointer_for_field): For nested constructs, also
look for map clauses on target construct.
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 1119366..3ab9da4 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1584,6 +1584,19 @@
return true;
})
+;; Return true if OP is a parallel for identity permute.
+(define_predicate "avx_identity_operand"
+ (and (match_code "parallel")
+ (match_code "const_int" "a"))
+{
+ int i, nelt = XVECLEN (op, 0);
+
+ for (i = 0; i < nelt; ++i)
+ if (INTVAL (XVECEXP (op, 0, i)) != i)
+ return false;
+ return true;
+})
+
;; Return true if OP is a proper third operand to vpblendw256.
(define_predicate "avx2_pblendw_operand"
(match_code "const_int")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ac4cf5b..cfd79a8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -21358,6 +21358,24 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn_and_split "*avx_vec_concat<mode>_1"
+ [(set (match_operand:V_256_512 0 "register_operand")
+ (vec_concat:V_256_512
+ (vec_select:<ssehalfvecmode>
+ (unspec:V_256_512
+ [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand")]
+ UNSPEC_CAST)
+ (match_parallel 3 "avx_identity_operand"
+ [(match_operand 4 "const_int_operand")]))
+ (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand")))]
+ "TARGET_AVX
+ && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
+ || !MEM_P (operands[1]))
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0) (vec_concat:V_256_512 (match_dup 1) (match_dup 2)))])
+
(define_insn "vcvtph2ps<mask_name>"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_select:V4SF
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5802f0d..7b0b9c2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2020-02-06 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/93594
+ * gcc.target/i386/avx2-pr93594.c: New test.
+
2020-02-05 Martin Sebor <msebor@redhat.com>
PR tree-optimization/92765
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr93594.c b/gcc/testsuite/gcc.target/i386/avx2-pr93594.c
new file mode 100644
index 0000000..963c8de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr93594.c
@@ -0,0 +1,32 @@
+/* PR target/93594 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -masm=att" } */
+/* { dg-final { scan-assembler-times "vmovdqa\t%xmm0, %xmm0" 4 } } */
+/* { dg-final { scan-assembler-not "vpxor\t%" } } */
+/* { dg-final { scan-assembler-not "vinserti128\t\\\$" } } */
+
+#include <x86intrin.h>
+
+__m256i
+foo (__m128i x)
+{
+ return _mm256_setr_m128i (x, _mm_setzero_si128 ());
+}
+
+__m256i
+bar (__m128i x)
+{
+ return _mm256_set_m128i (_mm_setzero_si128 (), x);
+}
+
+__m256i
+baz (__m128i x)
+{
+ return _mm256_insertf128_si256 (_mm256_setzero_si256 (), x, 0);
+}
+
+__m256i
+qux (__m128i x)
+{
+ return _mm256_insertf128_si256 (_mm256_castsi128_si256 (x), _mm_setzero_si128 (), 1);
+}