aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-08-08 11:36:43 +0200
committerRichard Biener <rguenth@gcc.gnu.org>2024-08-20 13:01:40 +0200
commitb8ea13ebf1211714503fd72f25c04376483bfa53 (patch)
tree45e9c5df211ace9a951f1213b630f4f9e83334c3
parent35f56012806432fd89bbae431950a8dc5f6729a3 (diff)
downloadgcc-b8ea13ebf1211714503fd72f25c04376483bfa53.zip
gcc-b8ea13ebf1211714503fd72f25c04376483bfa53.tar.gz
gcc-b8ea13ebf1211714503fd72f25c04376483bfa53.tar.bz2
tree-optimization/116274 - overzealous SLP vectorization
The following tries to address that the vectorizer fails to have precise knowledge of argument and return calling conventions and views some accesses as loads and stores that are not. This is mainly important when doing basic-block vectorization as otherwise loop indexing would force such arguments to memory. On x86 the reduction in the number of apparent loads and stores often dominates cost analysis so the following tries to mitigate this aggressively by adjusting only the scalar load and store cost, reducing them to the cost of a simple scalar statement, but not touching the vector access cost which would be much harder to estimate. Thereby we error on the side of not performing basic-block vectorization. PR tree-optimization/116274 * tree-vect-slp.cc (vect_bb_slp_scalar_cost): Cost scalar loads and stores as simple scalar stmts when they access a non-global, not address-taken variable that doesn't have BLKmode assigned. * gcc.target/i386/pr116274-2.c: New testcase.
-rw-r--r--gcc/testsuite/gcc.target/i386/pr116274-2.c9
-rw-r--r--gcc/tree-vect-slp.cc12
2 files changed, 20 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.target/i386/pr116274-2.c b/gcc/testsuite/gcc.target/i386/pr116274-2.c
new file mode 100644
index 0000000..d581134
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116274-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-slp2-optimized" } */
+
+struct a { long x,y; };
+long test(struct a a) { return a.x+a.y; }
+
+/* { dg-final { scan-tree-dump-not "basic block part vectorized" "slp2" } } */
+/* { dg-final { scan-assembler-times "addl|leaq" 1 } } */
+/* { dg-final { scan-assembler-not "padd" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 43ecd26..d6f34d0 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -7618,7 +7618,17 @@ next_lane:
vect_cost_for_stmt kind;
if (STMT_VINFO_DATA_REF (orig_stmt_info))
{
- if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info)))
+ data_reference_p dr = STMT_VINFO_DATA_REF (orig_stmt_info);
+ tree base = get_base_address (DR_REF (dr));
+ /* When the scalar access is to a non-global not address-taken
+ decl that is not BLKmode assume we can access it with a single
+ non-load/store instruction. */
+ if (DECL_P (base)
+ && !is_global_var (base)
+ && !TREE_ADDRESSABLE (base)
+ && DECL_MODE (base) != BLKmode)
+ kind = scalar_stmt;
+ else if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info)))
kind = scalar_load;
else
kind = scalar_store;