aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKugan Vivekanandarajah <kvivekananda@nvidia.com>2024-10-31 07:23:10 +1100
committerKugan Vivekanandarajah <kvivekananda@nvidia.com>2024-10-31 07:23:16 +1100
commitacba8b3d8dec0124c8b3a7e112b3a784a5091214 (patch)
treec61a8c40f9bca86c66fff80e92ddd3dc6ad4a7d4 /gcc
parent5be5c66071b407a767856b8fa300ede54fcf11b4 (diff)
downloadgcc-acba8b3d8dec0124c8b3a7e112b3a784a5091214.zip
gcc-acba8b3d8dec0124c8b3a7e112b3a784a5091214.tar.gz
gcc-acba8b3d8dec0124c8b3a7e112b3a784a5091214.tar.bz2
[PATCH] Fix SLP when ifcvt versioned loop is not vectorized
When ifcvt version a loop, it sets dont_vectorize to the scalar loop. If the vector loop is not vectorized and removed, the scalar loop is still left with dont_vectorize. As a result, BB vectorization will not happen. This patch resets dont_vectorize to scalar loop when IFN_LOOP_VECTORIZED is set to false. gcc/ChangeLog: * tree-vectorizer.cc (pass_vectorize::execute): Reset dont_vectorize to scalar loop when setting IFN_LOOP_VECTORIZED to false. gcc/testsuite/ChangeLog: * gcc.dg/vect/bb-slp-77.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-77.c74
-rw-r--r--gcc/tree-vectorizer.cc2
2 files changed, 76 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-77.c b/gcc/testsuite/gcc.dg/vect/bb-slp-77.c
new file mode 100644
index 0000000..b2cc1d1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-77.c
@@ -0,0 +1,74 @@
+
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+#include <stdint.h>
+#include <string.h>
+
+
+typedef struct {
+ uint16_t d;
+ uint16_t m;
+ uint8_t val1[4];
+ uint8_t val2[16];
+} st1;
+
+typedef struct {
+ float d;
+ float s;
+ int8_t val2[32];
+} st2;
+
+float table[1 << 16];
+
+inline static float foo(uint16_t f) {
+ uint16_t s;
+ memcpy(&s, &f, sizeof(uint16_t));
+ return table[s];
+}
+
+
+void test(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) {
+ const int nb = n / 32;
+
+
+ const st1 * restrict x = vx;
+ const st2 * restrict y = vy;
+
+ float sumf = 0.0;
+
+ for (int i = 0; i < nb; i++) {
+ uint32_t val1;
+ memcpy(&val1, x[i].val1, sizeof(val1));
+
+ int sumi0 = 0;
+ int sumi1 = 0;
+
+ if (val1) {
+ for (int j = 0; j < 16; ++j) {
+ const uint8_t xh_0 = ((val1 >> (j)) << 4) & 0x10;
+ const uint8_t xh_1 = ((val1 >> (j + 12)) ) & 0x10;
+
+ const int32_t x0 = (x[i].val2[j] & 0xF) | xh_0;
+ const int32_t x1 = (x[i].val2[j] >> 4) | xh_1;
+
+ sumi0 += (x0 * y[i].val2[j]);
+ sumi1 += (x1 * y[i].val2[j + 16]);
+ }
+ } else {
+ for (int j = 0; j < 16; ++j) {
+ const int32_t x0 = (x[i].val2[j] & 0xF);
+ const int32_t x1 = (x[i].val2[j] >> 4);
+
+ sumi0 += (x0 * y[i].val2[j]);
+ sumi1 += (x1 * y[i].val2[j + 16]);
+ }
+ }
+
+ int sumi = sumi0 + sumi1;
+ sumf += (foo(x[i].d)*y[i].d)*sumi + foo(x[i].m)*y[i].s;
+ }
+
+ *s = sumf;
+}
+
+/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp1" { target { { vect_int_mult && vect_element_align } && { ! powerpc*-*-* } } } } } */
diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc
index af112f2..16fa0ec 100644
--- a/gcc/tree-vectorizer.cc
+++ b/gcc/tree-vectorizer.cc
@@ -1326,6 +1326,7 @@ pass_vectorize::execute (function *fun)
if (g)
{
fold_loop_internal_call (g, boolean_false_node);
+ loop->dont_vectorize = false;
ret |= TODO_cleanup_cfg;
g = NULL;
}
@@ -1335,6 +1336,7 @@ pass_vectorize::execute (function *fun)
if (g)
{
fold_loop_internal_call (g, boolean_false_node);
+ loop->dont_vectorize = false;
ret |= TODO_cleanup_cfg;
}
}