aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2021-07-07 11:41:03 +0200
committerRichard Biener <rguenther@suse.de>2021-07-07 13:48:26 +0200
commit9f34b780b0461ec7b2b2defe96e44ab616ea2aa3 (patch)
tree2d8d45ef233f09fc56f46fb6926198c55d5727db /gcc
parent98bfd845e93937d92ca844d7fa7e853ad51c6193 (diff)
downloadgcc-9f34b780b0461ec7b2b2defe96e44ab616ea2aa3.zip
gcc-9f34b780b0461ec7b2b2defe96e44ab616ea2aa3.tar.gz
gcc-9f34b780b0461ec7b2b2defe96e44ab616ea2aa3.tar.bz2
tree-optimization/99728 - improve LIM for loops with aggregate copies
This improves LIM by recording aggregate copies for disambiguation purposes instead of as UNANALYZABLE_MEM which will prevent any invariant or store motion across it. This allows four of the six references in the loop of the testcase to be promoted. 2021-07-07 Richard Biener <rguenther@suse.de> PR tree-optimization/99728 * tree-ssa-loop-im.c (gather_mem_refs_stmt): Record aggregate copies. (mem_refs_may_alias_p): Add assert we handled aggregate copies elsewhere. (sm_seq_valid_bb): Give up when running into aggregate copies. (ref_indep_loop_p): Handle aggregate copies as never being invariant themselves but allow other refs to be disambiguated against them. (can_sm_ref_p): Do not try to apply store-motion to aggregate copies. * g++.dg/opt/pr99728.C: New testcase.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/g++.dg/opt/pr99728.C50
-rw-r--r--gcc/tree-ssa-loop-im.c59
2 files changed, 102 insertions, 7 deletions
diff --git a/gcc/testsuite/g++.dg/opt/pr99728.C b/gcc/testsuite/g++.dg/opt/pr99728.C
new file mode 100644
index 0000000..d439323
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr99728.C
@@ -0,0 +1,50 @@
+// PR/99728
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-lim2-details -w -Wno-psabi" }
+
+typedef double __m256d __attribute__((vector_size(sizeof (double) * 4)));
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pd (double __A)
+{
+ return __extension__ (__m256d){ __A, __A, __A, __A };
+}
+
+// simple OO wrapper around __m256d
+struct Tvsimple
+ {
+ __m256d v;
+ Tvsimple &operator+=(const Tvsimple &other) {v+=other.v; return *this;}
+ Tvsimple operator*(double val) const { Tvsimple res; res.v = v*_mm256_set1_pd(val); return res;}
+ Tvsimple operator*(Tvsimple val) const { Tvsimple res; res.v = v*val.v; return res; }
+ Tvsimple operator+(Tvsimple val) const { Tvsimple res; res.v = v+val.v; return res; }
+ Tvsimple operator+(double val) const { Tvsimple res; res.v = v+_mm256_set1_pd(val); return res;}
+ };
+
+template<typename vtype> struct s0data_s
+ { vtype sth, corfac, scale, lam1, lam2, csq, p1r, p1i, p2r, p2i; };
+
+template<typename vtype> void foo(s0data_s<vtype> & __restrict__ d,
+ const double * __restrict__ coef, const double * __restrict__ alm,
+ unsigned long l, unsigned long il, unsigned long lmax)
+ {
+// critical loop
+ while (l<=lmax)
+ {
+ d.p1r += d.lam2*alm[2*l];
+ d.p1i += d.lam2*alm[2*l+1];
+ d.p2r += d.lam2*alm[2*l+2];
+ d.p2i += d.lam2*alm[2*l+3];
+ Tvsimple tmp = d.lam2*(d.csq*coef[2*il] + coef[2*il+1]) + d.lam1;
+ d.lam1 = d.lam2;
+ d.lam2 = tmp;
+ ++il; l+=2;
+ }
+ }
+
+// this version has dead stores at the end of the loop
+template void foo<>(s0data_s<Tvsimple> & __restrict__ d,
+ const double * __restrict__ coef, const double * __restrict__ alm,
+ unsigned long l, unsigned long il, unsigned long lmax);
+
+// The aggregate copy in the IL should not prevent all store-motion
+// { dg-final { scan-tree-dump-times "Executing store motion" 4 "lim2" } }
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 9ac390b..81b4ec2 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -122,7 +122,9 @@ public:
hashval_t hash; /* Its hash value. */
/* The memory access itself and associated caching of alias-oracle
- query meta-data. */
+ query meta-data. We are using mem.ref == error_mark_node for the
+ case the reference is represented by its single access stmt
+ in accesses_in_loop[0]. */
ao_ref mem;
bitmap stored; /* The set of loops in that this memory location
@@ -130,8 +132,7 @@ public:
bitmap loaded; /* The set of loops in that this memory location
is loaded from. */
vec<mem_ref_loc> accesses_in_loop;
- /* The locations of the accesses. Vector
- indexed by the loop number. */
+ /* The locations of the accesses. */
/* The following set is computed on demand. */
bitmap_head dep_loop; /* The set of loops in that the memory
@@ -1465,7 +1466,22 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt)
return;
mem = simple_mem_ref_in_stmt (stmt, &is_stored);
- if (!mem)
+ if (!mem && is_gimple_assign (stmt))
+ {
+ /* For aggregate copies record distinct references but use them
+ only for disambiguation purposes. */
+ id = memory_accesses.refs_list.length ();
+ ref = mem_ref_alloc (NULL, 0, id);
+ memory_accesses.refs_list.safe_push (ref);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Unhandled memory reference %u: ", id);
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+ }
+ record_mem_ref_loc (ref, stmt, mem);
+ is_stored = gimple_vdef (stmt);
+ }
+ else if (!mem)
{
/* We use the shared mem_ref for all unanalyzable refs. */
id = UNANALYZABLE_MEM_ID;
@@ -1595,7 +1611,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt)
mark_ref_stored (ref, loop);
}
/* A not simple memory op is also a read when it is a write. */
- if (!is_stored || id == UNANALYZABLE_MEM_ID)
+ if (!is_stored || id == UNANALYZABLE_MEM_ID
+ || ref->mem.ref == error_mark_node)
{
bitmap_set_bit (&memory_accesses.refs_loaded_in_loop[loop->num], ref->id);
mark_ref_loaded (ref, loop);
@@ -1714,6 +1731,9 @@ mem_refs_may_alias_p (im_mem_ref *mem1, im_mem_ref *mem2,
hash_map<tree, name_expansion *> **ttae_cache,
bool tbaa_p)
{
+ gcc_checking_assert (mem1->mem.ref != error_mark_node
+ && mem2->mem.ref != error_mark_node);
+
/* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
object and their offset differ in such a way that the locations cannot
overlap, then they cannot alias. */
@@ -2490,6 +2510,13 @@ sm_seq_valid_bb (class loop *loop, basic_block bb, tree vdef,
gcc_assert (data);
if (data->ref == UNANALYZABLE_MEM_ID)
return -1;
+ /* Stop at memory references which we can't move. */
+ else if (memory_accesses.refs_list[data->ref]->mem.ref == error_mark_node)
+ {
+ /* Mark refs_not_in_seq as unsupported. */
+ bitmap_ior_into (refs_not_supported, refs_not_in_seq);
+ return 1;
+ }
/* One of the stores we want to apply SM to and we've not yet seen. */
else if (bitmap_clear_bit (refs_not_in_seq, data->ref))
{
@@ -2798,7 +2825,8 @@ ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
else
refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
- if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
+ if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID)
+ || ref->mem.ref == error_mark_node)
indep_p = false;
else
{
@@ -2825,7 +2853,20 @@ ref_indep_loop_p (class loop *loop, im_mem_ref *ref, dep_kind kind)
EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
{
im_mem_ref *aref = memory_accesses.refs_list[i];
- if (!refs_independent_p (ref, aref, kind != sm_waw))
+ if (aref->mem.ref == error_mark_node)
+ {
+ gimple *stmt = aref->accesses_in_loop[0].stmt;
+ if ((kind == sm_war
+ && ref_maybe_used_by_stmt_p (stmt, &ref->mem,
+ kind != sm_waw))
+ || stmt_may_clobber_ref_p_1 (stmt, &ref->mem,
+ kind != sm_waw))
+ {
+ indep_p = false;
+ break;
+ }
+ }
+ else if (!refs_independent_p (ref, aref, kind != sm_waw))
{
indep_p = false;
break;
@@ -2858,6 +2899,10 @@ can_sm_ref_p (class loop *loop, im_mem_ref *ref)
if (!MEM_ANALYZABLE (ref))
return false;
+ /* Can't hoist/sink aggregate copies. */
+ if (ref->mem.ref == error_mark_node)
+ return false;
+
/* It should be movable. */
if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
|| TREE_THIS_VOLATILE (ref->mem.ref)