aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-01-28 08:46:23 +0100
committerJakub Jelinek <jakub@redhat.com>2020-01-28 08:46:23 +0100
commitbff948aa337807260344c83ac9079d6386410094 (patch)
treeb033aaaf56846dc0de6a6010fc24b1a12989810b /gcc
parent3c076c9642fd8877def0a0597ec7e4adfb5aa3b3 (diff)
downloadgcc-bff948aa337807260344c83ac9079d6386410094.zip
gcc-bff948aa337807260344c83ac9079d6386410094.tar.gz
gcc-bff948aa337807260344c83ac9079d6386410094.tar.bz2
i386: Fix ix86_fold_builtin shift folding [PR93418]
The following testcase is miscompiled, because the variable shift left operand, { -1, -1, -1, -1 } is represented as a VECTOR_CST with VECTOR_CST_NPATTERNS 1 and VECTOR_CST_NELTS_PER_PATTERN 1, so when we call builder.new_unary_operation, builder.encoded_nelts () will be just 1 and thus we encode the resulting vector as if all the elements were the same. For non-masked is_vshift, we could perhaps call builder.new_binary_operation (TREE_TYPE (args[0]), args[0], args[1], false), but then there are masked shifts, for non-is_vshift we could perhaps call it too but with args[2] instead of args[1], but there is no builder.new_ternary_operation. All this stuff is primarily for aarch64 anyway, on x86 we don't have any variable length vectors, and it is not a big deal to compute all elements and just let builder.finalize () find the most efficient VECTOR_CST representation of the vector. So, instead of doing too much, this just keeps using new_unary_operation only if only one VECTOR_CST is involved (i.e. non-masked shift by constant) and for the rest just compute all elts. 2020-01-28 Jakub Jelinek <jakub@redhat.com> PR target/93418 * config/i386/i386.c (ix86_fold_builtin) <do_shift>: If mask is not -1 or is_vshift is true, use new_vector with number of elts npatterns rather than new_unary_operation. * gcc.target/i386/avx2-pr93418.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/i386/i386.c9
-rw-r--r--gcc/testsuite/ChangeLog3
-rw-r--r--gcc/testsuite/gcc.target/i386/avx2-pr93418.c20
4 files changed, 35 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6db98ed..3489428 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
2020-01-28 Jakub Jelinek <jakub@redhat.com>
+ PR target/93418
+ * config/i386/i386.c (ix86_fold_builtin) <do_shift>: If mask is not
+ -1 or is_vshift is true, use new_vector with number of elts npatterns
+ rather than new_unary_operation.
+
PR tree-optimization/93454
* gimple-fold.c (fold_array_ctor_reference): Perform
elt_size.to_uhwi () just once, instead of calling it in every
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ffe60ba..ffda3e8 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17278,8 +17278,13 @@ ix86_fold_builtin (tree fndecl, int n_args,
countt = build_int_cst (integer_type_node, count);
}
tree_vector_builder builder;
- builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
- false);
+ if (mask != HOST_WIDE_INT_M1U || is_vshift)
+ builder.new_vector (TREE_TYPE (args[0]),
+ TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
+ 1);
+ else
+ builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
+ false);
unsigned int cnt = builder.encoded_nelts ();
for (unsigned int i = 0; i < cnt; ++i)
{
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 44d8e67..6518f7f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,8 @@
2020-01-28 Jakub Jelinek <jakub@redhat.com>
+ PR target/93418
+ * gcc.target/i386/avx2-pr93418.c: New test.
+
PR tree-optimization/93454
* gcc.dg/pr93454.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr93418.c b/gcc/testsuite/gcc.target/i386/avx2-pr93418.c
new file mode 100644
index 0000000..67ed33d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-pr93418.c
@@ -0,0 +1,20 @@
+/* PR target/93418 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
+
+#include <x86intrin.h>
+
+void link_error (void);
+
+void
+foo (void)
+{
+ __m128i a = _mm_set1_epi32 (0xffffffffU);
+ __m128i b = _mm_setr_epi32 (16, 31, -34, 3);
+ __m128i c = _mm_sllv_epi32 (a, b);
+ __v4su d = (__v4su) c;
+ if (d[0] != 0xffff0000U || d[1] != 0x80000000U
+ || d[2] != 0 || d[3] != 0xfffffff8U)
+ link_error ();
+}