diff options
-rw-r--r-- | gcc/ipa-icf-gimple.cc | 41 | ||||
-rw-r--r-- | gcc/ipa-icf-gimple.h | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-1_0.c | 86 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-1_1.c | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-2_0.c | 87 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-2_1.c | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-3_0.c | 114 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-3_1.c | 49 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-4_0.c | 114 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-4_1.c | 49 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-5_0.c | 118 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/lto/pr113359-5_1.c | 50 | ||||
-rw-r--r-- | gcc/tree-sra.cc | 252 | ||||
-rw-r--r-- | gcc/tree-sra.h | 3 |
14 files changed, 999 insertions, 55 deletions
diff --git a/gcc/ipa-icf-gimple.cc b/gcc/ipa-icf-gimple.cc index 17f62be..c25eb24 100644 --- a/gcc/ipa-icf-gimple.cc +++ b/gcc/ipa-icf-gimple.cc @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "attribs.h" #include "gimple-walk.h" +#include "tree-sra.h" #include "tree-ssa-alias-compare.h" #include "alloc-pool.h" @@ -64,7 +65,8 @@ func_checker::func_checker (tree source_func_decl, tree target_func_decl, : m_source_func_decl (source_func_decl), m_target_func_decl (target_func_decl), m_ignored_source_nodes (ignored_source_nodes), m_ignored_target_nodes (ignored_target_nodes), - m_ignore_labels (ignore_labels), m_tbaa (tbaa) + m_ignore_labels (ignore_labels), m_tbaa (tbaa), + m_total_scalarization_limit_known_p (false) { function *source_func = DECL_STRUCT_FUNCTION (source_func_decl); function *target_func = DECL_STRUCT_FUNCTION (target_func_decl); @@ -361,6 +363,36 @@ func_checker::operand_equal_p (const_tree t1, const_tree t2, return operand_compare::operand_equal_p (t1, t2, flags); } +/* Return true if either T1 and T2 cannot be totally scalarized or if doing + so would result in copying the same memory. Otherwise return false. */ + +bool +func_checker::safe_for_total_scalarization_p (tree t1, tree t2) +{ + tree type1 = TREE_TYPE (t1); + tree type2 = TREE_TYPE (t2); + + if (!AGGREGATE_TYPE_P (type1) + || !AGGREGATE_TYPE_P (type2) + || !tree_fits_uhwi_p (TYPE_SIZE (type1)) + || !tree_fits_uhwi_p (TYPE_SIZE (type2))) + return true; + + if (!m_total_scalarization_limit_known_p) + { + push_cfun (DECL_STRUCT_FUNCTION (m_target_func_decl)); + m_total_scalarization_limit = sra_get_max_scalarization_size (); + pop_cfun (); + m_total_scalarization_limit_known_p = true; + } + + unsigned HOST_WIDE_INT sz = tree_to_uhwi (TYPE_SIZE (type1)); + gcc_assert (sz == tree_to_uhwi (TYPE_SIZE (type2))); + if (sz > m_total_scalarization_limit) + return true; + return sra_total_scalarization_would_copy_same_data_p (type1, type2); +} + /* Function responsible for comparison of various operands T1 and T2 which are accessed as ACCESS. If these components, from functions FUNC1 and FUNC2, are equal, true @@ -382,7 +414,12 @@ func_checker::compare_operand (tree t1, tree t2, operand_access_type access) lto_streaming_expected_p (), m_tbaa); if (!flags) - return true; + { + if (!safe_for_total_scalarization_p (t1, t2)) + return return_false_with_msg + ("total scalarization may not be equivalent"); + return true; + } if (flags & SEMANTICS) return return_false_with_msg ("compare_ao_refs failed (semantic difference)"); diff --git a/gcc/ipa-icf-gimple.h b/gcc/ipa-icf-gimple.h index 7f41961..38e2ec0 100644 --- a/gcc/ipa-icf-gimple.h +++ b/gcc/ipa-icf-gimple.h @@ -125,7 +125,8 @@ public: func_checker (): m_source_func_decl (NULL_TREE), m_target_func_decl (NULL_TREE), m_ignored_source_nodes (NULL), m_ignored_target_nodes (NULL), - m_ignore_labels (false), m_tbaa (true) + m_ignore_labels (false), m_tbaa (true), + m_total_scalarization_limit_known_p (false) { m_source_ssa_names.create (0); m_target_ssa_names.create (0); @@ -205,6 +206,10 @@ public: enum operand_access_type {OP_MEMORY, OP_NORMAL}; typedef hash_set<tree> operand_access_type_map; + /* Return true if either T1 and T2 cannot be totally scalarized or if doing + so would result in copying the same memory. Otherwise return false. */ + bool safe_for_total_scalarization_p (tree t1, tree t2); + /* Function responsible for comparison of various operands T1 and T2. If these components, from functions FUNC1 and FUNC2, are equal, true is returned. */ @@ -279,6 +284,14 @@ private: /* Flag if we should compare type based alias analysis info. */ bool m_tbaa; + /* Set to true when total scalarization size has already been determined for + the functions. */ + bool m_total_scalarization_limit_known_p; + + /* When the above it set to true the determiend total scalarization + limit. */ + unsigned HOST_WIDE_INT m_total_scalarization_limit; + public: /* Return true if two operands are equal. The flags fields can be used to specify OEP flags described above. */ diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-1_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-1_0.c new file mode 100644 index 0000000..0b3ca68 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-1_0.c @@ -0,0 +1,86 @@ +/* { dg-lto-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */ + +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct SA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SB +{ + unsigned long bx; + unsigned int by; + unsigned long bz; +}; + +struct ZA +{ + int p; + struct SA s; + short q; +}; + +struct ZB +{ + int p; + struct SB s; + short q; +}; + +void __attribute__((noinline)) +geta (struct SA *d, struct ZA *p) +{ + struct SA tmp = p->s; + *d = tmp; +} + +void getb (struct SB *d, struct ZB *p); + +struct ZA ga; +struct ZB gb; + +void __attribute__((noipa)) +init (void) +{ + ga.s.ax = CI; + ga.s.ay = CL1; + ga.s.az = CL2; + + gb.s.bx = CL1; + gb.s.by = CI; + gb.s.bz = CL2; +} + +int +main (int argc, char **argv) +{ + init(); + struct SA a; + geta (&a, &ga); + + if (a.ax != CI) + __builtin_abort (); + if (a.ay != CL1) + __builtin_abort (); + if (a.az != CL2) + __builtin_abort (); + + struct SB b; + getb (&b, &gb); + + if (b.bx != CL1) + __builtin_abort (); + if (b.by != CI) + __builtin_abort (); + if (b.bz != CL2) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-1_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-1_1.c new file mode 100644 index 0000000..f51b84b --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-1_1.c @@ -0,0 +1,38 @@ +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct SA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SB +{ + unsigned long bx; + unsigned int by; + unsigned long bz; +}; + +struct ZA +{ + int p; + struct SA s; + short q; +}; + +struct ZB +{ + int p; + struct SB s; + short q; +}; + +void __attribute__((noinline)) +getb (struct SB *d, struct ZB *p) +{ + struct SB tmp = p->s; + *d = tmp; +} diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-2_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-2_0.c new file mode 100644 index 0000000..8b2d5bd --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-2_0.c @@ -0,0 +1,87 @@ +/* { dg-lto-do run } */ +/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra -fdump-ipa-icf-details }} } */ + +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct SA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SB +{ + unsigned int bx; + unsigned long by; + unsigned long bz; +}; + +struct ZA +{ + int p; + struct SA s; + short q; +}; + +struct ZB +{ + int p; + struct SB s; + short q; +}; + +void __attribute__((noinline)) +geta (struct SA *d, struct ZA *p) +{ + struct SA tmp = p->s; + *d = tmp; +} + +void getb (struct SB *d, struct ZB *p); + +struct ZA ga; +struct ZB gb; + +void __attribute__((noipa)) +init (void) +{ + ga.s.ax = CI; + ga.s.ay = CL1; + ga.s.az = CL2; + + gb.s.bx = CI; + gb.s.by = CL1; + gb.s.bz = CL2; +} + +int +main (int argc, char **argv) +{ + init(); + struct SA a; + geta (&a, &ga); + + if (a.ax != CI) + __builtin_abort (); + if (a.ay != CL1) + __builtin_abort (); + if (a.az != CL2) + __builtin_abort (); + + struct SB b; + getb (&b, &gb); + + if (b.bx != CI) + __builtin_abort (); + if (b.by != CL1) + __builtin_abort (); + if (b.bz != CL2) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-wpa-ipa-dump "Semantic equality hit:geta/.*getb/" "icf" } } */ diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-2_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-2_1.c new file mode 100644 index 0000000..61bc054 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-2_1.c @@ -0,0 +1,38 @@ +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct SA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SB +{ + unsigned int bx; + unsigned long by; + unsigned long bz; +}; + +struct ZA +{ + int p; + struct SA s; + short q; +}; + +struct ZB +{ + int p; + struct SB s; + short q; +}; + +void __attribute__((noinline)) +getb (struct SB *d, struct ZB *p) +{ + struct SB tmp = p->s; + *d = tmp; +} diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-3_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-3_0.c new file mode 100644 index 0000000..f74819b --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-3_0.c @@ -0,0 +1,114 @@ +/* { dg-lto-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */ + +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct AA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SA +{ + int p; + struct AA arr[2]; +}; + +struct ZA +{ + struct SA s; + short q; +}; + +struct AB +{ + unsigned long bx; + unsigned int by; + unsigned long bz; +}; + +struct SB +{ + int p; + struct AB arr[2]; +}; + +struct ZB +{ + struct SB s; + short q; +}; + +void __attribute__((noinline)) +geta (struct SA *d, struct ZA *p) +{ + struct SA tmp = p->s; + *d = tmp; +} + +void getb (struct SB *d, struct ZB *p); + +struct ZA ga; +struct ZB gb; + +void __attribute__((noipa)) +init (void) +{ + ga.s.arr[0].ax = CI; + ga.s.arr[0].ay = CL1; + ga.s.arr[0].az = CL2; + ga.s.arr[1].ax = CI; + ga.s.arr[1].ay = CL1; + ga.s.arr[1].az = CL2; + + gb.s.arr[0].bx = CL1; + gb.s.arr[0].by = CI; + gb.s.arr[0].bz = CL2; + gb.s.arr[1].bx = CL1; + gb.s.arr[1].by = CI; + gb.s.arr[1].bz = CL2; +} + +int +main (int argc, char **argv) +{ + init(); + struct SA a; + geta (&a, &ga); + + if (a.arr[0].ax != CI) + __builtin_abort (); + if (a.arr[0].ay != CL1) + __builtin_abort (); + if (a.arr[0].az != CL2) + __builtin_abort (); + if (a.arr[1].ax != CI) + __builtin_abort (); + if (a.arr[1].ay != CL1) + __builtin_abort (); + if (a.arr[1].az != CL2) + __builtin_abort (); + + struct SB b; + getb (&b, &gb); + + if (b.arr[0].bx != CL1) + __builtin_abort (); + if (b.arr[0].by != CI) + __builtin_abort (); + if (b.arr[0].bz != CL2) + __builtin_abort (); + if (b.arr[1].bx != CL1) + __builtin_abort (); + if (b.arr[1].by != CI) + __builtin_abort (); + if (b.arr[1].bz != CL2) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-3_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-3_1.c new file mode 100644 index 0000000..25a6a0a --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-3_1.c @@ -0,0 +1,49 @@ +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct AA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SA +{ + int p; + struct AA arr[2]; +}; + +struct ZA +{ + struct SA s; + short q; +}; + +struct AB +{ + unsigned long bx; + unsigned int by; + unsigned long bz; +}; + +struct SB +{ + int p; + struct AB arr[2]; +}; + +struct ZB +{ + struct SB s; + short q; +}; + + +void __attribute__((noinline)) +getb (struct SB *d, struct ZB *p) +{ + struct SB tmp = p->s; + *d = tmp; +} diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-4_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-4_0.c new file mode 100644 index 0000000..170c8d4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-4_0.c @@ -0,0 +1,114 @@ +/* { dg-lto-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */ + +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct AA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SA +{ + int p; + struct AA arr[2]; +}; + +struct ZA +{ + struct SA s; + short q; +}; + +struct AB +{ + unsigned long bx; + unsigned long by; + unsigned int bz; +}; + +struct SB +{ + int p; + struct AB arr[2]; +}; + +struct ZB +{ + struct SB s; + short q; +}; + +void __attribute__((noinline)) +geta (struct SA *d, struct ZA *p) +{ + struct SA tmp = p->s; + *d = tmp; +} + +void getb (struct SB *d, struct ZB *p); + +struct ZA ga; +struct ZB gb; + +void __attribute__((noipa)) +init (void) +{ + ga.s.arr[0].ax = CI; + ga.s.arr[0].ay = CL1; + ga.s.arr[0].az = CL2; + ga.s.arr[1].ax = CI; + ga.s.arr[1].ay = CL1; + ga.s.arr[1].az = CL2; + + gb.s.arr[0].bx = CL1; + gb.s.arr[0].by = CL2; + gb.s.arr[0].bz = CI; + gb.s.arr[1].bx = CL1; + gb.s.arr[1].by = CL2; + gb.s.arr[1].bz = CI; +} + +int +main (int argc, char **argv) +{ + init(); + struct SA a; + geta (&a, &ga); + + if (a.arr[0].ax != CI) + __builtin_abort (); + if (a.arr[0].ay != CL1) + __builtin_abort (); + if (a.arr[0].az != CL2) + __builtin_abort (); + if (a.arr[1].ax != CI) + __builtin_abort (); + if (a.arr[1].ay != CL1) + __builtin_abort (); + if (a.arr[1].az != CL2) + __builtin_abort (); + + struct SB b; + getb (&b, &gb); + + if (b.arr[0].bx != CL1) + __builtin_abort (); + if (b.arr[0].by != CL2) + __builtin_abort (); + if (b.arr[0].bz != CI) + __builtin_abort (); + if (b.arr[1].bx != CL1) + __builtin_abort (); + if (b.arr[1].by != CL2) + __builtin_abort (); + if (b.arr[1].bz != CI) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-4_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-4_1.c new file mode 100644 index 0000000..7659810 --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-4_1.c @@ -0,0 +1,49 @@ +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct AA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SA +{ + int p; + struct AA arr[2]; +}; + +struct ZA +{ + struct SA s; + short q; +}; + +struct AB +{ + unsigned long bx; + unsigned long by; + unsigned int bz; +}; + +struct SB +{ + int p; + struct AB arr[2]; +}; + +struct ZB +{ + struct SB s; + short q; +}; + + +void __attribute__((noinline)) +getb (struct SB *d, struct ZB *p) +{ + struct SB tmp = p->s; + *d = tmp; +} diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-5_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-5_0.c new file mode 100644 index 0000000..5ad457f --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-5_0.c @@ -0,0 +1,118 @@ +/* { dg-lto-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra -fdump-ipa-icf-details }} } */ + +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct AA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SA +{ + int p; + struct AA arr[2]; + short ee; +}; + +struct ZA +{ + struct SA s; + short q; +}; + +struct AB +{ + unsigned int bx; + unsigned long by; + unsigned long bz; +}; + +struct SB +{ + int p; + struct AB arr[2]; + short ee; +}; + +struct ZB +{ + struct SB s; + short q; +}; + +void __attribute__((noinline)) +geta (struct SA *d, struct ZA *p) +{ + struct SA tmp = p->s; + *d = tmp; +} + +void getb (struct SB *d, struct ZB *p); + +struct ZA ga; +struct ZB gb; + +void __attribute__((noipa)) +init (void) +{ + ga.s.arr[0].ax = CI; + ga.s.arr[0].ay = CL1; + ga.s.arr[0].az = CL2; + ga.s.arr[1].ax = CI; + ga.s.arr[1].ay = CL1; + ga.s.arr[1].az = CL2; + + gb.s.arr[0].bx = CI; + gb.s.arr[0].by = CL1; + gb.s.arr[0].bz = CL2; + gb.s.arr[1].bx = CI; + gb.s.arr[1].by = CL1; + gb.s.arr[1].bz = CL2; +} + +int +main (int argc, char **argv) +{ + init(); + struct SA a; + geta (&a, &ga); + + if (a.arr[0].ax != CI) + __builtin_abort (); + if (a.arr[0].ay != CL1) + __builtin_abort (); + if (a.arr[0].az != CL2) + __builtin_abort (); + if (a.arr[1].ax != CI) + __builtin_abort (); + if (a.arr[1].ay != CL1) + __builtin_abort (); + if (a.arr[1].az != CL2) + __builtin_abort (); + + struct SB b; + getb (&b, &gb); + + if (b.arr[0].bx != CI) + __builtin_abort (); + if (b.arr[0].by != CL1) + __builtin_abort (); + if (b.arr[0].bz != CL2) + __builtin_abort (); + if (b.arr[1].bx != CI) + __builtin_abort (); + if (b.arr[1].by != CL1) + __builtin_abort (); + if (b.arr[1].bz != CL2) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-wpa-ipa-dump "Semantic equality hit:geta/.*getb/" "icf" } } */ diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-5_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-5_1.c new file mode 100644 index 0000000..128b23c --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/pr113359-5_1.c @@ -0,0 +1,50 @@ +#define CI 0xdeadbeef +#define CL1 0xdeaddead1234beef +#define CL2 0xdead1234deadbeef + +struct AA +{ + unsigned int ax; + unsigned long ay; + unsigned long az; +}; + +struct SA +{ + int p; + struct AA arr[2]; + short ee; +}; + +struct ZA +{ + struct SA s; + short q; +}; + +struct AB +{ + unsigned int bx; + unsigned long by; + unsigned long bz; +}; + +struct SB +{ + int p; + struct AB arr[2]; + short ee; +}; + +struct ZB +{ + struct SB s; + short q; +}; + +void __attribute__((noinline)) +getb (struct SB *d, struct ZB *p) +{ + struct SB tmp = p->s; + *d = tmp; +} diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc index dbfae5e..32fa289 100644 --- a/gcc/tree-sra.cc +++ b/gcc/tree-sra.cc @@ -985,18 +985,101 @@ create_access (tree expr, gimple *stmt, bool write) return access; } +/* Given an array type TYPE, extract element size to *EL_SIZE, minimum index to + *IDX and maximum index to *MAX so that the caller can iterate over all + elements and return true, except if the array is known to be zero-length, + then return false. */ -/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or fixed-length - ARRAY_TYPE with fields that are either of gimple register types (excluding - bit-fields) or (recursively) scalarizable types. CONST_DECL must be true if - we are considering a decl from constant pool. If it is false, char arrays - will be refused. */ +static bool +prepare_iteration_over_array_elts (tree type, HOST_WIDE_INT *el_size, + offset_int *idx, offset_int *max) +{ + tree elem_size = TYPE_SIZE (TREE_TYPE (type)); + gcc_assert (elem_size && tree_fits_shwi_p (elem_size)); + *el_size = tree_to_shwi (elem_size); + gcc_assert (*el_size > 0); + + tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (type)); + gcc_assert (TREE_CODE (minidx) == INTEGER_CST); + tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */ + if (!maxidx) + return false; + gcc_assert (TREE_CODE (maxidx) == INTEGER_CST); + tree domain = TYPE_DOMAIN (type); + /* MINIDX and MAXIDX are inclusive, and must be interpreted in + DOMAIN (e.g. signed int, whereas min/max may be size_int). */ + *idx = wi::to_offset (minidx); + *max = wi::to_offset (maxidx); + if (!TYPE_UNSIGNED (domain)) + { + *idx = wi::sext (*idx, TYPE_PRECISION (domain)); + *max = wi::sext (*max, TYPE_PRECISION (domain)); + } + return true; +} + +/* A structure to track collecting padding and hold collected padding + information. */ + +class sra_padding_collecting +{ +public: + /* Given that there won't be any data until at least OFFSET, add an + appropriate entry to the list of paddings or extend the last one. */ + void record_padding (HOST_WIDE_INT offset); + /* Vector of pairs describing contiguous pieces of padding, each pair + consisting of offset and length. */ + auto_vec<std::pair<HOST_WIDE_INT, HOST_WIDE_INT>, 10> m_padding; + /* Offset where data should continue after the last seen actual bit of data + if there was no padding. */ + HOST_WIDE_INT m_data_until = 0; +}; + +/* Given that there won't be any data until at least OFFSET, add an appropriate + entry to the list of paddings or extend the last one. */ + +void sra_padding_collecting::record_padding (HOST_WIDE_INT offset) +{ + if (offset > m_data_until) + { + HOST_WIDE_INT psz = offset - m_data_until; + if (!m_padding.is_empty () + && ((m_padding[m_padding.length () - 1].first + + m_padding[m_padding.length () - 1].second) == offset)) + m_padding[m_padding.length () - 1].second += psz; + else + m_padding.safe_push (std::make_pair (m_data_until, psz)); + } +} + +/* Return true iff TYPE is totally scalarizable - i.e. a RECORD_TYPE or + fixed-length ARRAY_TYPE with fields that are either of gimple register types + (excluding bit-fields) or (recursively) scalarizable types. CONST_DECL must + be true if we are considering a decl from constant pool. If it is false, + char arrays will be refused. + + TOTAL_OFFSET is the offset of TYPE within any outer type that is being + examined. + + If PC is non-NULL, collect padding information into the vector within the + structure. The information is however only complete if the function returns + true and does not contain any padding at its end. */ static bool -scalarizable_type_p (tree type, bool const_decl) +totally_scalarizable_type_p (tree type, bool const_decl, + HOST_WIDE_INT total_offset, + sra_padding_collecting *pc) { if (is_gimple_reg_type (type)) - return true; + { + if (pc) + { + pc->record_padding (total_offset); + pc->m_data_until = total_offset + tree_to_shwi (TYPE_SIZE (type)); + } + return true; + } if (type_contains_placeholder_p (type)) return false; @@ -1011,6 +1094,8 @@ scalarizable_type_p (tree type, bool const_decl) { tree ft = TREE_TYPE (fld); + if (!DECL_SIZE (fld)) + return false; if (zerop (DECL_SIZE (fld))) continue; @@ -1025,7 +1110,8 @@ scalarizable_type_p (tree type, bool const_decl) if (DECL_BIT_FIELD (fld)) return false; - if (!scalarizable_type_p (ft, const_decl)) + if (!totally_scalarizable_type_p (ft, const_decl, total_offset + pos, + pc)) return false; } @@ -1054,9 +1140,35 @@ scalarizable_type_p (tree type, bool const_decl) /* Variable-length array, do not allow scalarization. */ return false; + unsigned old_padding_len = 0; + if (pc) + old_padding_len = pc->m_padding.length (); tree elem = TREE_TYPE (type); - if (!scalarizable_type_p (elem, const_decl)) + if (!totally_scalarizable_type_p (elem, const_decl, total_offset, pc)) return false; + if (pc) + { + unsigned new_padding_len = pc->m_padding.length (); + HOST_WIDE_INT el_size; + offset_int idx, max; + if (!prepare_iteration_over_array_elts (type, &el_size, &idx, &max)) + return true; + pc->record_padding (total_offset + el_size); + ++idx; + for (HOST_WIDE_INT pos = total_offset + el_size; + idx <= max; + pos += el_size, ++idx) + { + for (unsigned i = old_padding_len; i < new_padding_len; i++) + { + HOST_WIDE_INT pp + = pos + pc->m_padding[i].first - total_offset; + HOST_WIDE_INT psz = pc->m_padding[i].second; + pc->m_padding.safe_push (std::make_pair (pp, psz)); + } + } + pc->m_data_until = total_offset + tree_to_shwi (TYPE_SIZE (type)); + } return true; } default: @@ -3540,28 +3652,12 @@ totally_scalarize_subtree (struct access *root) case ARRAY_TYPE: { tree elemtype = TREE_TYPE (root->type); - tree elem_size = TYPE_SIZE (elemtype); - gcc_assert (elem_size && tree_fits_shwi_p (elem_size)); - HOST_WIDE_INT el_size = tree_to_shwi (elem_size); - gcc_assert (el_size > 0); - - tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (root->type)); - gcc_assert (TREE_CODE (minidx) == INTEGER_CST); - tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (root->type)); - /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */ - if (!maxidx) - goto out; - gcc_assert (TREE_CODE (maxidx) == INTEGER_CST); - tree domain = TYPE_DOMAIN (root->type); - /* MINIDX and MAXIDX are inclusive, and must be interpreted in - DOMAIN (e.g. signed int, whereas min/max may be size_int). */ - offset_int idx = wi::to_offset (minidx); - offset_int max = wi::to_offset (maxidx); - if (!TYPE_UNSIGNED (domain)) - { - idx = wi::sext (idx, TYPE_PRECISION (domain)); - max = wi::sext (max, TYPE_PRECISION (domain)); - } + HOST_WIDE_INT el_size; + offset_int idx, max; + if (!prepare_iteration_over_array_elts (root->type, &el_size, + &idx, &max)) + break; + for (HOST_WIDE_INT pos = root->offset; idx <= max; pos += el_size, ++idx) @@ -3587,7 +3683,8 @@ totally_scalarize_subtree (struct access *root) ? &last_seen_sibling->next_sibling : &root->first_child); tree nref = build4 (ARRAY_REF, elemtype, root->expr, - wide_int_to_tree (domain, idx), + wide_int_to_tree (TYPE_DOMAIN (root->type), + idx), NULL_TREE, NULL_TREE); struct access *new_child = create_total_access_and_reshape (root, pos, el_size, elemtype, @@ -3605,11 +3702,34 @@ totally_scalarize_subtree (struct access *root) default: gcc_unreachable (); } - - out: return true; } +/* Get the total total scalarization size limit in the current function. */ + +unsigned HOST_WIDE_INT +sra_get_max_scalarization_size (void) +{ + bool optimize_speed_p = !optimize_function_for_size_p (cfun); + /* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>, + fall back to a target default. */ + unsigned HOST_WIDE_INT max_scalarization_size + = get_move_ratio (optimize_speed_p) * UNITS_PER_WORD; + + if (optimize_speed_p) + { + if (OPTION_SET_P (param_sra_max_scalarization_size_speed)) + max_scalarization_size = param_sra_max_scalarization_size_speed; + } + else + { + if (OPTION_SET_P (param_sra_max_scalarization_size_size)) + max_scalarization_size = param_sra_max_scalarization_size_size; + } + max_scalarization_size *= BITS_PER_UNIT; + return max_scalarization_size; +} + /* Go through all accesses collected throughout the (intraprocedural) analysis stage, exclude overlapping ones, identify representatives and build trees out of them, making decisions about scalarization on the way. Return true @@ -3637,24 +3757,8 @@ analyze_all_variable_accesses (void) propagate_all_subaccesses (); - bool optimize_speed_p = !optimize_function_for_size_p (cfun); - /* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>, - fall back to a target default. */ unsigned HOST_WIDE_INT max_scalarization_size - = get_move_ratio (optimize_speed_p) * UNITS_PER_WORD; - - if (optimize_speed_p) - { - if (OPTION_SET_P (param_sra_max_scalarization_size_speed)) - max_scalarization_size = param_sra_max_scalarization_size_speed; - } - else - { - if (OPTION_SET_P (param_sra_max_scalarization_size_size)) - max_scalarization_size = param_sra_max_scalarization_size_size; - } - max_scalarization_size *= BITS_PER_UNIT; - + = sra_get_max_scalarization_size (); EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi) if (bitmap_bit_p (should_scalarize_away_bitmap, i) && !bitmap_bit_p (cannot_scalarize_away_bitmap, i)) @@ -3679,7 +3783,9 @@ analyze_all_variable_accesses (void) access; access = access->next_grp) if (!can_totally_scalarize_forest_p (access) - || !scalarizable_type_p (access->type, constant_decl_p (var))) + || !totally_scalarizable_type_p (access->type, + constant_decl_p (var), + 0, nullptr)) { all_types_ok = false; break; @@ -5100,3 +5206,45 @@ make_pass_sra (gcc::context *ctxt) { return new pass_sra (ctxt); } + + +/* If type T cannot be totally scalarized, return false. Otherwise return true + and push to the vector within PC offsets and lengths of all padding in the + type as total scalarization would encounter it. */ + +static bool +check_ts_and_push_padding_to_vec (tree type, sra_padding_collecting *pc) +{ + if (!totally_scalarizable_type_p (type, true /* optimistic value */, + 0, pc)) + return false; + + pc->record_padding (tree_to_shwi (TYPE_SIZE (type))); + return true; +} + +/* Given two types in an assignment, return true either if any one cannot be + totally scalarized or if they have padding (i.e. not copied bits) */ + +bool +sra_total_scalarization_would_copy_same_data_p (tree t1, tree t2) +{ + sra_padding_collecting p1; + if (!check_ts_and_push_padding_to_vec (t1, &p1)) + return true; + + sra_padding_collecting p2; + if (!check_ts_and_push_padding_to_vec (t2, &p2)) + return true; + + unsigned l = p1.m_padding.length (); + if (l != p2.m_padding.length ()) + return false; + for (unsigned i = 0; i < l; i++) + if (p1.m_padding[i].first != p2.m_padding[i].first + || p1.m_padding[i].second != p2.m_padding[i].second) + return false; + + return true; +} + diff --git a/gcc/tree-sra.h b/gcc/tree-sra.h index 3a12870..a4aab21 100644 --- a/gcc/tree-sra.h +++ b/gcc/tree-sra.h @@ -20,6 +20,9 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ bool type_internals_preclude_sra_p (tree type, const char **msg); +unsigned HOST_WIDE_INT sra_get_max_scalarization_size (void); +bool sra_total_scalarization_would_copy_same_data_p (tree t1, tree t2); + /* Return true iff TYPE is stdarg va_list type (which early SRA and IPA-SRA should leave alone). */ |