aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2025-01-20 19:52:31 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2025-01-20 19:52:31 +0000
commit8edf8b552313951cb4f2f97821ee4b3820c9506b (patch)
treebc6a0cd0ab3938cee3be8c866f677e56f610c74a
parent6612b8e55471fabd2071a9637a06d3ffce2b05a6 (diff)
downloadgcc-8edf8b552313951cb4f2f97821ee4b3820c9506b.zip
gcc-8edf8b552313951cb4f2f97821ee4b3820c9506b.tar.gz
gcc-8edf8b552313951cb4f2f97821ee4b3820c9506b.tar.bz2
vect: Preserve OMP info for conditional stores [PR118384]
OMP reductions are lowered into the form: idx = .OMP_SIMD_LANE (simuid, 0); ... oldval = D.anon[idx]; newval = oldval op ...; D.anon[idx] = newval; So if the scalar loop has a {0, +, 1} iv i, idx = i % vf. Despite this wraparound, the vectoriser pretends that the D.anon accesses are linear. It records the .OMP_SIMD_LANE's second argument (val) in the data_reference aux field (-1 - val) and then copies this to the stmt_vec_info simd_lane_access_p field (val + 1). vectorizable_load and vectorizable_store use simd_lane_access_p to detect accesses of this form and suppress the vector pointer increments that would be used for genuine linear accesses. The difference in this PR is that the reduction is conditional, and so the store back to D.anon is recognised as a conditional store pattern. simd_lane_access_p was not being copied across from the original stmt_vec_info to the pattern stmt_vec_info, meaning that it was vectorised as a normal linear store. gcc/ PR tree-optimization/118384 * tree-vectorizer.cc (vec_info::move_dr): Copy STMT_VINFO_SIMD_LANE_ACCESS_P. gcc/testsuite/ PR tree-optimization/118384 * gcc.target/aarch64/pr118384_1.c: New test. * gcc.target/aarch64/pr118384_2.c: Likewise.
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr118384_1.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr118384_2.c4
-rw-r--r--gcc/tree-vectorizer.cc2
3 files changed, 37 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/pr118384_1.c b/gcc/testsuite/gcc.target/aarch64/pr118384_1.c
new file mode 100644
index 0000000..75f6dad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr118384_1.c
@@ -0,0 +1,31 @@
+/* { dg-do run { target aarch64_sve128_hw } } */
+/* { dg-options "-O2 -fopenmp-simd -fno-trapping-math -msve-vector-bits=128 --param aarch64-autovec-preference=sve-only -fstack-protector-strong" } */
+
+#pragma GCC target "+sve"
+
+[[gnu::noipa]] float f(float *ptr, long n)
+{
+ float res = 0.0f;
+#pragma omp simd reduction(+:res)
+ for (long i = 0; i < n; ++i)
+ if (ptr[i] >= 1.0f)
+ res += ptr[i];
+ return res;
+}
+
+[[gnu::noipa]] float g(float *ptr, long n)
+{
+ return f(ptr, n) + 1;
+}
+
+int
+main ()
+{
+#define N 64 * 1024
+ float data[N];
+ for (long i = 0; i < N; ++i)
+ data[i] = 1;
+ if (g(data, N) != N + 1)
+ __builtin_abort();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/pr118384_2.c b/gcc/testsuite/gcc.target/aarch64/pr118384_2.c
new file mode 100644
index 0000000..f45a222
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr118384_2.c
@@ -0,0 +1,4 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-O2 -fopenmp-simd -fno-trapping-math -msve-vector-bits=256 --param aarch64-autovec-preference=sve-only -fstack-protector-strong" } */
+
+#include "pr118384_1.c"
diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc
index 01c6470..f38c8d2 100644
--- a/gcc/tree-vectorizer.cc
+++ b/gcc/tree-vectorizer.cc
@@ -633,6 +633,8 @@ vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info)
= STMT_VINFO_GATHER_SCATTER_P (old_stmt_info);
STMT_VINFO_STRIDED_P (new_stmt_info)
= STMT_VINFO_STRIDED_P (old_stmt_info);
+ STMT_VINFO_SIMD_LANE_ACCESS_P (new_stmt_info)
+ = STMT_VINFO_SIMD_LANE_ACCESS_P (old_stmt_info);
}
/* Permanently remove the statement described by STMT_INFO from the