aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2021-12-06 15:13:49 +0100
committerRichard Biener <rguenther@suse.de>2021-12-06 16:17:05 +0100
commit0dc77a0c4942d3b264f8f8cfc2c509ecc02c3634 (patch)
tree180fb22336c100bf0c4cbd867458eb68a461a26d
parent11013814fc83b62a8a367d550b2b6ea0ab9ef8c6 (diff)
downloadgcc-0dc77a0c4942d3b264f8f8cfc2c509ecc02c3634.zip
gcc-0dc77a0c4942d3b264f8f8cfc2c509ecc02c3634.tar.gz
gcc-0dc77a0c4942d3b264f8f8cfc2c509ecc02c3634.tar.bz2
tree-optimization/103581 - fix masked gather on x86
The recent fix to PR103527 exposed an issue with how the various special casing for AVX512 masks in vect_build_gather_load_calls are handled. The following makes that more obvious, fixing the miscompile of 403.gcc. 2021-12-06 Richard Biener <rguenther@suse.de> PR tree-optimization/103581 * tree-vect-stmts.c (vect_build_gather_load_calls): Properly guard all the AVX512 mask cases. * gcc.dg/vect/pr103581.c: New testcase.
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr103581.c59
-rw-r--r--gcc/tree-vect-stmts.c4
2 files changed, 61 insertions, 2 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/pr103581.c b/gcc/testsuite/gcc.dg/vect/pr103581.c
new file mode 100644
index 0000000..d072748
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr103581.c
@@ -0,0 +1,59 @@
+/* { dg-additional-options "-mavx2 -mtune-ctrl=use_gather" { target avx2_runtime } } */
+
+#include "tree-vect.h"
+
+#define MASKGATHER(SUFF, TYPE1, TYPE2) \
+TYPE1 * __attribute__((noipa)) \
+maskgather ## SUFF (int n, TYPE2 *indices, TYPE1 *data) \
+{ \
+ TYPE1 *out = __builtin_malloc (sizeof (TYPE1) * n); \
+ for (int i = 0; i < n; ++i) \
+ { \
+ TYPE2 d = indices[i]; \
+ if (d > 1) \
+ out[i] = data[d]; \
+ } \
+ return out; \
+}
+
+MASKGATHER(udiusi, unsigned long long, unsigned int)
+MASKGATHER(usiusi, unsigned int, unsigned int)
+MASKGATHER(udiudi, unsigned long long, unsigned long long)
+MASKGATHER(usiudi, unsigned int, unsigned long long)
+
+int
+main()
+{
+ check_vect ();
+
+ unsigned int idx4[32], data4[32];
+ unsigned long long idx8[32], data8[32];
+ for (int i = 0; i < 32; ++i)
+ {
+ idx4[i] = i;
+ idx8[i] = i;
+ data4[i] = i;
+ data8[i] = i;
+ }
+ unsigned long long *resudiusi = maskgatherudiusi (16, idx4, data8);
+ unsigned int *resusiusi = maskgatherusiusi (16, idx4, data4);
+ unsigned long long *resudiudi = maskgatherudiudi (16, idx8, data8);
+ unsigned int *resusiudi = maskgatherusiudi (16, idx8, data4);
+ for (int i = 0; i < 16; ++i)
+ {
+ unsigned int d = idx4[i];
+ if (d > 1)
+ {
+ if (resudiusi[i] != data4[d])
+ __builtin_abort ();
+ if (resudiudi[i] != data4[d])
+ __builtin_abort ();
+ if (resusiudi[i] != data4[d])
+ __builtin_abort ();
+ if (resusiusi[i] != data4[d])
+ __builtin_abort ();
+ }
+ }
+ return 0;
+}
+
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 84c6d97..8c42717 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2785,7 +2785,7 @@ vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
ncopies *= 2;
- if (mask && masktype == real_masktype)
+ if (mask && VECTOR_TYPE_P (real_masktype))
{
for (int i = 0; i < count; ++i)
sel[i] = i | (count / 2);
@@ -2882,7 +2882,7 @@ vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
mask_op = var;
}
}
- if (modifier == NARROW && masktype != real_masktype)
+ if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
{
var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
gassign *new_stmt