aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.h
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2020-10-29 13:38:01 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2020-10-29 13:38:01 +0000
commit6e23549157d671f4f2e61756a0d0924cc59718ab (patch)
tree5b8e002bbfc48786bb13610aeaabfcb5e53c0db4 /gcc/tree-vectorizer.h
parent568de14d2e74cfdd600b8995ff6ac08c98ddef48 (diff)
downloadgcc-6e23549157d671f4f2e61756a0d0924cc59718ab.zip
gcc-6e23549157d671f4f2e61756a0d0924cc59718ab.tar.gz
gcc-6e23549157d671f4f2e61756a0d0924cc59718ab.tar.bz2
vect: Fix load costs for SLP permutes
For the following test case (compiled with load/store lanes disabled locally): void f (uint32_t *restrict x, uint8_t *restrict y, int n) { for (int i = 0; i < n; ++i) { x[i * 2] = x[i * 2] + y[i * 2]; x[i * 2 + 1] = x[i * 2 + 1] + y[i * 2]; } } we have a redundant no-op permute on the x[] load node: node 0x4472350 (max_nunits=8, refcnt=2) stmt 0 _5 = *_4; stmt 1 _13 = *_12; load permutation { 0 1 } Then, when costing it, we pick a cost of 1, even though we need 4 copies of the x[] load to match a single y[] load: ==> examining statement: _5 = *_4; Vectorizing an unaligned access. vect_model_load_cost: unaligned supported by hardware. vect_model_load_cost: inside_cost = 1, prologue_cost = 0 . The problem is that the code only considers the permutation for the first scalar iteration, rather than for all VF iterations. This patch tries to fix that by making vect_transform_slp_perm_load calculate the value instead. gcc/ * tree-vectorizer.h (vect_transform_slp_perm_load): Take an optional extra parameter. * tree-vect-slp.c (vect_transform_slp_perm_load): Calculate the number of loads as well as the number of permutes, taking the counting loop from... * tree-vect-stmts.c (vect_model_load_cost): ...here. Use the value computed by vect_transform_slp_perm_load for ncopies.
Diffstat (limited to 'gcc/tree-vectorizer.h')
-rw-r--r--gcc/tree-vectorizer.h3
1 files changed, 2 insertions, 1 deletions
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 13a02cd..fbf5291 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1952,7 +1952,8 @@ extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
extern void vect_free_slp_instance (slp_instance);
extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, vec<tree>,
gimple_stmt_iterator *, poly_uint64,
- bool, unsigned *);
+ bool, unsigned *,
+ unsigned * = nullptr);
extern bool vect_slp_analyze_operations (vec_info *);
extern void vect_schedule_slp (vec_info *, vec<slp_instance>);
extern opt_result vect_analyze_slp (vec_info *, unsigned);