aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Jambor <mjambor@suse.cz>2024-04-08 18:53:23 +0200
committerMartin Jambor <mjambor@suse.cz>2024-04-08 18:54:21 +0200
commit1e3312a25a7b34d6e3f549273e1674c7114e4408 (patch)
tree6cc160ba4f8c660abdba701df11936f8f347e4bb
parent1162861439fd3c4b30fc3ccd49462e47e876f04a (diff)
downloadgcc-1e3312a25a7b34d6e3f549273e1674c7114e4408.zip
gcc-1e3312a25a7b34d6e3f549273e1674c7114e4408.tar.gz
gcc-1e3312a25a7b34d6e3f549273e1674c7114e4408.tar.bz2
ICF&SRA: Make ICF and SRA agree on padding
PR 113359 shows that (at least with -fno-strict-aliasing) ICF can unify two functions which copy an aggregate type of the same size but then SRA, through its total scalarization, can copy the aggregate by pieces, skipping paddding, but the padding was not the same in the two original functions that ICF unified. This patch enhances SRA with the ability to collect padding information which then can be compared from within ICF. Unfortunately SRA uses OPTION_SET_P when determining its limits, so ICF needs to switch cfuns at least once to figure it out too. gcc/ChangeLog: 2024-03-27 Martin Jambor <mjambor@suse.cz> PR ipa/113359 * ipa-icf-gimple.h (func_checker): New members safe_for_total_scalarization_p, m_total_scalarization_limit_known_p and m_total_scalarization_limit. (func_checker::func_checker): Initialize new member variables. * ipa-icf-gimple.cc: Include tree-sra.h. (func_checker::func_checker): Initialize new member variables. (func_checker::safe_for_total_scalarization_p): New function. (func_checker::compare_operand): Use the new function. * tree-sra.h (sra_get_max_scalarization_size): Declare. (sra_total_scalarization_would_copy_same_data_p): Likewise. * tree-sra.cc (prepare_iteration_over_array_elts): New function. (class sra_padding_collecting): New. (sra_padding_collecting::record_padding): Likewise. (scalarizable_type_p): Rename to totally_scalarizable_type_p. Add ability to record padding when requested. (totally_scalarize_subtree): Split out gathering information necessary to iterate over array elements to prepare_iteration_over_array_elts. Fix errornous early exit. (analyze_all_variable_accesses): Adjust the call to totally_scalarizable_type_p. Move determining of total scalariation size limit... (sra_get_max_scalarization_size): ...here. (check_ts_and_push_padding_to_vec): New function. (sra_total_scalarization_would_copy_same_data_p): Likewise. gcc/testsuite/ChangeLog: 2024-03-27 Martin Jambor <mjambor@suse.cz> PR ipa/113359 * gcc.dg/lto/pr113359-1_0.c: New. * gcc.dg/lto/pr113359-1_1.c: Likewise. * gcc.dg/lto/pr113359-2_0.c: Likewise. * gcc.dg/lto/pr113359-2_1.c: Likewise. * gcc.dg/lto/pr113359-3_0.c: Likewise. * gcc.dg/lto/pr113359-3_1.c: Likewise. * gcc.dg/lto/pr113359-4_0.c: Likewise. * gcc.dg/lto/pr113359-4_1.c: Likewise. * gcc.dg/lto/pr113359-5_0.c: Likewise. * gcc.dg/lto/pr113359-5_1.c: Likewise.
-rw-r--r--gcc/ipa-icf-gimple.cc41
-rw-r--r--gcc/ipa-icf-gimple.h15
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-1_0.c86
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-1_1.c38
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-2_0.c87
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-2_1.c38
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-3_0.c114
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-3_1.c49
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-4_0.c114
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-4_1.c49
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-5_0.c118
-rw-r--r--gcc/testsuite/gcc.dg/lto/pr113359-5_1.c50
-rw-r--r--gcc/tree-sra.cc252
-rw-r--r--gcc/tree-sra.h3
14 files changed, 999 insertions, 55 deletions
diff --git a/gcc/ipa-icf-gimple.cc b/gcc/ipa-icf-gimple.cc
index 17f62be..c25eb24 100644
--- a/gcc/ipa-icf-gimple.cc
+++ b/gcc/ipa-icf-gimple.cc
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfgloop.h"
#include "attribs.h"
#include "gimple-walk.h"
+#include "tree-sra.h"
#include "tree-ssa-alias-compare.h"
#include "alloc-pool.h"
@@ -64,7 +65,8 @@ func_checker::func_checker (tree source_func_decl, tree target_func_decl,
: m_source_func_decl (source_func_decl), m_target_func_decl (target_func_decl),
m_ignored_source_nodes (ignored_source_nodes),
m_ignored_target_nodes (ignored_target_nodes),
- m_ignore_labels (ignore_labels), m_tbaa (tbaa)
+ m_ignore_labels (ignore_labels), m_tbaa (tbaa),
+ m_total_scalarization_limit_known_p (false)
{
function *source_func = DECL_STRUCT_FUNCTION (source_func_decl);
function *target_func = DECL_STRUCT_FUNCTION (target_func_decl);
@@ -361,6 +363,36 @@ func_checker::operand_equal_p (const_tree t1, const_tree t2,
return operand_compare::operand_equal_p (t1, t2, flags);
}
+/* Return true if either T1 and T2 cannot be totally scalarized or if doing
+ so would result in copying the same memory. Otherwise return false. */
+
+bool
+func_checker::safe_for_total_scalarization_p (tree t1, tree t2)
+{
+ tree type1 = TREE_TYPE (t1);
+ tree type2 = TREE_TYPE (t2);
+
+ if (!AGGREGATE_TYPE_P (type1)
+ || !AGGREGATE_TYPE_P (type2)
+ || !tree_fits_uhwi_p (TYPE_SIZE (type1))
+ || !tree_fits_uhwi_p (TYPE_SIZE (type2)))
+ return true;
+
+ if (!m_total_scalarization_limit_known_p)
+ {
+ push_cfun (DECL_STRUCT_FUNCTION (m_target_func_decl));
+ m_total_scalarization_limit = sra_get_max_scalarization_size ();
+ pop_cfun ();
+ m_total_scalarization_limit_known_p = true;
+ }
+
+ unsigned HOST_WIDE_INT sz = tree_to_uhwi (TYPE_SIZE (type1));
+ gcc_assert (sz == tree_to_uhwi (TYPE_SIZE (type2)));
+ if (sz > m_total_scalarization_limit)
+ return true;
+ return sra_total_scalarization_would_copy_same_data_p (type1, type2);
+}
+
/* Function responsible for comparison of various operands T1 and T2
which are accessed as ACCESS.
If these components, from functions FUNC1 and FUNC2, are equal, true
@@ -382,7 +414,12 @@ func_checker::compare_operand (tree t1, tree t2, operand_access_type access)
lto_streaming_expected_p (), m_tbaa);
if (!flags)
- return true;
+ {
+ if (!safe_for_total_scalarization_p (t1, t2))
+ return return_false_with_msg
+ ("total scalarization may not be equivalent");
+ return true;
+ }
if (flags & SEMANTICS)
return return_false_with_msg
("compare_ao_refs failed (semantic difference)");
diff --git a/gcc/ipa-icf-gimple.h b/gcc/ipa-icf-gimple.h
index 7f41961..38e2ec0 100644
--- a/gcc/ipa-icf-gimple.h
+++ b/gcc/ipa-icf-gimple.h
@@ -125,7 +125,8 @@ public:
func_checker ():
m_source_func_decl (NULL_TREE), m_target_func_decl (NULL_TREE),
m_ignored_source_nodes (NULL), m_ignored_target_nodes (NULL),
- m_ignore_labels (false), m_tbaa (true)
+ m_ignore_labels (false), m_tbaa (true),
+ m_total_scalarization_limit_known_p (false)
{
m_source_ssa_names.create (0);
m_target_ssa_names.create (0);
@@ -205,6 +206,10 @@ public:
enum operand_access_type {OP_MEMORY, OP_NORMAL};
typedef hash_set<tree> operand_access_type_map;
+ /* Return true if either T1 and T2 cannot be totally scalarized or if doing
+ so would result in copying the same memory. Otherwise return false. */
+ bool safe_for_total_scalarization_p (tree t1, tree t2);
+
/* Function responsible for comparison of various operands T1 and T2.
If these components, from functions FUNC1 and FUNC2, are equal, true
is returned. */
@@ -279,6 +284,14 @@ private:
/* Flag if we should compare type based alias analysis info. */
bool m_tbaa;
+ /* Set to true when total scalarization size has already been determined for
+ the functions. */
+ bool m_total_scalarization_limit_known_p;
+
+ /* When the above it set to true the determiend total scalarization
+ limit. */
+ unsigned HOST_WIDE_INT m_total_scalarization_limit;
+
public:
/* Return true if two operands are equal. The flags fields can be used
to specify OEP flags described above. */
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-1_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-1_0.c
new file mode 100644
index 0000000..0b3ca68
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-1_0.c
@@ -0,0 +1,86 @@
+/* { dg-lto-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */
+
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct SA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SB
+{
+ unsigned long bx;
+ unsigned int by;
+ unsigned long bz;
+};
+
+struct ZA
+{
+ int p;
+ struct SA s;
+ short q;
+};
+
+struct ZB
+{
+ int p;
+ struct SB s;
+ short q;
+};
+
+void __attribute__((noinline))
+geta (struct SA *d, struct ZA *p)
+{
+ struct SA tmp = p->s;
+ *d = tmp;
+}
+
+void getb (struct SB *d, struct ZB *p);
+
+struct ZA ga;
+struct ZB gb;
+
+void __attribute__((noipa))
+init (void)
+{
+ ga.s.ax = CI;
+ ga.s.ay = CL1;
+ ga.s.az = CL2;
+
+ gb.s.bx = CL1;
+ gb.s.by = CI;
+ gb.s.bz = CL2;
+}
+
+int
+main (int argc, char **argv)
+{
+ init();
+ struct SA a;
+ geta (&a, &ga);
+
+ if (a.ax != CI)
+ __builtin_abort ();
+ if (a.ay != CL1)
+ __builtin_abort ();
+ if (a.az != CL2)
+ __builtin_abort ();
+
+ struct SB b;
+ getb (&b, &gb);
+
+ if (b.bx != CL1)
+ __builtin_abort ();
+ if (b.by != CI)
+ __builtin_abort ();
+ if (b.bz != CL2)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-1_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-1_1.c
new file mode 100644
index 0000000..f51b84b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-1_1.c
@@ -0,0 +1,38 @@
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct SA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SB
+{
+ unsigned long bx;
+ unsigned int by;
+ unsigned long bz;
+};
+
+struct ZA
+{
+ int p;
+ struct SA s;
+ short q;
+};
+
+struct ZB
+{
+ int p;
+ struct SB s;
+ short q;
+};
+
+void __attribute__((noinline))
+getb (struct SB *d, struct ZB *p)
+{
+ struct SB tmp = p->s;
+ *d = tmp;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-2_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-2_0.c
new file mode 100644
index 0000000..8b2d5bd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-2_0.c
@@ -0,0 +1,87 @@
+/* { dg-lto-do run } */
+/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra -fdump-ipa-icf-details }} } */
+
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct SA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SB
+{
+ unsigned int bx;
+ unsigned long by;
+ unsigned long bz;
+};
+
+struct ZA
+{
+ int p;
+ struct SA s;
+ short q;
+};
+
+struct ZB
+{
+ int p;
+ struct SB s;
+ short q;
+};
+
+void __attribute__((noinline))
+geta (struct SA *d, struct ZA *p)
+{
+ struct SA tmp = p->s;
+ *d = tmp;
+}
+
+void getb (struct SB *d, struct ZB *p);
+
+struct ZA ga;
+struct ZB gb;
+
+void __attribute__((noipa))
+init (void)
+{
+ ga.s.ax = CI;
+ ga.s.ay = CL1;
+ ga.s.az = CL2;
+
+ gb.s.bx = CI;
+ gb.s.by = CL1;
+ gb.s.bz = CL2;
+}
+
+int
+main (int argc, char **argv)
+{
+ init();
+ struct SA a;
+ geta (&a, &ga);
+
+ if (a.ax != CI)
+ __builtin_abort ();
+ if (a.ay != CL1)
+ __builtin_abort ();
+ if (a.az != CL2)
+ __builtin_abort ();
+
+ struct SB b;
+ getb (&b, &gb);
+
+ if (b.bx != CI)
+ __builtin_abort ();
+ if (b.by != CL1)
+ __builtin_abort ();
+ if (b.bz != CL2)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-wpa-ipa-dump "Semantic equality hit:geta/.*getb/" "icf" } } */
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-2_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-2_1.c
new file mode 100644
index 0000000..61bc054
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-2_1.c
@@ -0,0 +1,38 @@
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct SA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SB
+{
+ unsigned int bx;
+ unsigned long by;
+ unsigned long bz;
+};
+
+struct ZA
+{
+ int p;
+ struct SA s;
+ short q;
+};
+
+struct ZB
+{
+ int p;
+ struct SB s;
+ short q;
+};
+
+void __attribute__((noinline))
+getb (struct SB *d, struct ZB *p)
+{
+ struct SB tmp = p->s;
+ *d = tmp;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-3_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-3_0.c
new file mode 100644
index 0000000..f74819b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-3_0.c
@@ -0,0 +1,114 @@
+/* { dg-lto-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */
+
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct AA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SA
+{
+ int p;
+ struct AA arr[2];
+};
+
+struct ZA
+{
+ struct SA s;
+ short q;
+};
+
+struct AB
+{
+ unsigned long bx;
+ unsigned int by;
+ unsigned long bz;
+};
+
+struct SB
+{
+ int p;
+ struct AB arr[2];
+};
+
+struct ZB
+{
+ struct SB s;
+ short q;
+};
+
+void __attribute__((noinline))
+geta (struct SA *d, struct ZA *p)
+{
+ struct SA tmp = p->s;
+ *d = tmp;
+}
+
+void getb (struct SB *d, struct ZB *p);
+
+struct ZA ga;
+struct ZB gb;
+
+void __attribute__((noipa))
+init (void)
+{
+ ga.s.arr[0].ax = CI;
+ ga.s.arr[0].ay = CL1;
+ ga.s.arr[0].az = CL2;
+ ga.s.arr[1].ax = CI;
+ ga.s.arr[1].ay = CL1;
+ ga.s.arr[1].az = CL2;
+
+ gb.s.arr[0].bx = CL1;
+ gb.s.arr[0].by = CI;
+ gb.s.arr[0].bz = CL2;
+ gb.s.arr[1].bx = CL1;
+ gb.s.arr[1].by = CI;
+ gb.s.arr[1].bz = CL2;
+}
+
+int
+main (int argc, char **argv)
+{
+ init();
+ struct SA a;
+ geta (&a, &ga);
+
+ if (a.arr[0].ax != CI)
+ __builtin_abort ();
+ if (a.arr[0].ay != CL1)
+ __builtin_abort ();
+ if (a.arr[0].az != CL2)
+ __builtin_abort ();
+ if (a.arr[1].ax != CI)
+ __builtin_abort ();
+ if (a.arr[1].ay != CL1)
+ __builtin_abort ();
+ if (a.arr[1].az != CL2)
+ __builtin_abort ();
+
+ struct SB b;
+ getb (&b, &gb);
+
+ if (b.arr[0].bx != CL1)
+ __builtin_abort ();
+ if (b.arr[0].by != CI)
+ __builtin_abort ();
+ if (b.arr[0].bz != CL2)
+ __builtin_abort ();
+ if (b.arr[1].bx != CL1)
+ __builtin_abort ();
+ if (b.arr[1].by != CI)
+ __builtin_abort ();
+ if (b.arr[1].bz != CL2)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-3_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-3_1.c
new file mode 100644
index 0000000..25a6a0a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-3_1.c
@@ -0,0 +1,49 @@
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct AA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SA
+{
+ int p;
+ struct AA arr[2];
+};
+
+struct ZA
+{
+ struct SA s;
+ short q;
+};
+
+struct AB
+{
+ unsigned long bx;
+ unsigned int by;
+ unsigned long bz;
+};
+
+struct SB
+{
+ int p;
+ struct AB arr[2];
+};
+
+struct ZB
+{
+ struct SB s;
+ short q;
+};
+
+
+void __attribute__((noinline))
+getb (struct SB *d, struct ZB *p)
+{
+ struct SB tmp = p->s;
+ *d = tmp;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-4_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-4_0.c
new file mode 100644
index 0000000..170c8d4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-4_0.c
@@ -0,0 +1,114 @@
+/* { dg-lto-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra }} } */
+
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct AA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SA
+{
+ int p;
+ struct AA arr[2];
+};
+
+struct ZA
+{
+ struct SA s;
+ short q;
+};
+
+struct AB
+{
+ unsigned long bx;
+ unsigned long by;
+ unsigned int bz;
+};
+
+struct SB
+{
+ int p;
+ struct AB arr[2];
+};
+
+struct ZB
+{
+ struct SB s;
+ short q;
+};
+
+void __attribute__((noinline))
+geta (struct SA *d, struct ZA *p)
+{
+ struct SA tmp = p->s;
+ *d = tmp;
+}
+
+void getb (struct SB *d, struct ZB *p);
+
+struct ZA ga;
+struct ZB gb;
+
+void __attribute__((noipa))
+init (void)
+{
+ ga.s.arr[0].ax = CI;
+ ga.s.arr[0].ay = CL1;
+ ga.s.arr[0].az = CL2;
+ ga.s.arr[1].ax = CI;
+ ga.s.arr[1].ay = CL1;
+ ga.s.arr[1].az = CL2;
+
+ gb.s.arr[0].bx = CL1;
+ gb.s.arr[0].by = CL2;
+ gb.s.arr[0].bz = CI;
+ gb.s.arr[1].bx = CL1;
+ gb.s.arr[1].by = CL2;
+ gb.s.arr[1].bz = CI;
+}
+
+int
+main (int argc, char **argv)
+{
+ init();
+ struct SA a;
+ geta (&a, &ga);
+
+ if (a.arr[0].ax != CI)
+ __builtin_abort ();
+ if (a.arr[0].ay != CL1)
+ __builtin_abort ();
+ if (a.arr[0].az != CL2)
+ __builtin_abort ();
+ if (a.arr[1].ax != CI)
+ __builtin_abort ();
+ if (a.arr[1].ay != CL1)
+ __builtin_abort ();
+ if (a.arr[1].az != CL2)
+ __builtin_abort ();
+
+ struct SB b;
+ getb (&b, &gb);
+
+ if (b.arr[0].bx != CL1)
+ __builtin_abort ();
+ if (b.arr[0].by != CL2)
+ __builtin_abort ();
+ if (b.arr[0].bz != CI)
+ __builtin_abort ();
+ if (b.arr[1].bx != CL1)
+ __builtin_abort ();
+ if (b.arr[1].by != CL2)
+ __builtin_abort ();
+ if (b.arr[1].bz != CI)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-4_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-4_1.c
new file mode 100644
index 0000000..7659810
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-4_1.c
@@ -0,0 +1,49 @@
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct AA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SA
+{
+ int p;
+ struct AA arr[2];
+};
+
+struct ZA
+{
+ struct SA s;
+ short q;
+};
+
+struct AB
+{
+ unsigned long bx;
+ unsigned long by;
+ unsigned int bz;
+};
+
+struct SB
+{
+ int p;
+ struct AB arr[2];
+};
+
+struct ZB
+{
+ struct SB s;
+ short q;
+};
+
+
+void __attribute__((noinline))
+getb (struct SB *d, struct ZB *p)
+{
+ struct SB tmp = p->s;
+ *d = tmp;
+}
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-5_0.c b/gcc/testsuite/gcc.dg/lto/pr113359-5_0.c
new file mode 100644
index 0000000..5ad457f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-5_0.c
@@ -0,0 +1,118 @@
+/* { dg-lto-do run } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-lto-options {{-O2 -flto -fno-strict-aliasing -fno-ipa-cp --disable-tree-esra -fdump-ipa-icf-details }} } */
+
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct AA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SA
+{
+ int p;
+ struct AA arr[2];
+ short ee;
+};
+
+struct ZA
+{
+ struct SA s;
+ short q;
+};
+
+struct AB
+{
+ unsigned int bx;
+ unsigned long by;
+ unsigned long bz;
+};
+
+struct SB
+{
+ int p;
+ struct AB arr[2];
+ short ee;
+};
+
+struct ZB
+{
+ struct SB s;
+ short q;
+};
+
+void __attribute__((noinline))
+geta (struct SA *d, struct ZA *p)
+{
+ struct SA tmp = p->s;
+ *d = tmp;
+}
+
+void getb (struct SB *d, struct ZB *p);
+
+struct ZA ga;
+struct ZB gb;
+
+void __attribute__((noipa))
+init (void)
+{
+ ga.s.arr[0].ax = CI;
+ ga.s.arr[0].ay = CL1;
+ ga.s.arr[0].az = CL2;
+ ga.s.arr[1].ax = CI;
+ ga.s.arr[1].ay = CL1;
+ ga.s.arr[1].az = CL2;
+
+ gb.s.arr[0].bx = CI;
+ gb.s.arr[0].by = CL1;
+ gb.s.arr[0].bz = CL2;
+ gb.s.arr[1].bx = CI;
+ gb.s.arr[1].by = CL1;
+ gb.s.arr[1].bz = CL2;
+}
+
+int
+main (int argc, char **argv)
+{
+ init();
+ struct SA a;
+ geta (&a, &ga);
+
+ if (a.arr[0].ax != CI)
+ __builtin_abort ();
+ if (a.arr[0].ay != CL1)
+ __builtin_abort ();
+ if (a.arr[0].az != CL2)
+ __builtin_abort ();
+ if (a.arr[1].ax != CI)
+ __builtin_abort ();
+ if (a.arr[1].ay != CL1)
+ __builtin_abort ();
+ if (a.arr[1].az != CL2)
+ __builtin_abort ();
+
+ struct SB b;
+ getb (&b, &gb);
+
+ if (b.arr[0].bx != CI)
+ __builtin_abort ();
+ if (b.arr[0].by != CL1)
+ __builtin_abort ();
+ if (b.arr[0].bz != CL2)
+ __builtin_abort ();
+ if (b.arr[1].bx != CI)
+ __builtin_abort ();
+ if (b.arr[1].by != CL1)
+ __builtin_abort ();
+ if (b.arr[1].bz != CL2)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-wpa-ipa-dump "Semantic equality hit:geta/.*getb/" "icf" } } */
diff --git a/gcc/testsuite/gcc.dg/lto/pr113359-5_1.c b/gcc/testsuite/gcc.dg/lto/pr113359-5_1.c
new file mode 100644
index 0000000..128b23c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/lto/pr113359-5_1.c
@@ -0,0 +1,50 @@
+#define CI 0xdeadbeef
+#define CL1 0xdeaddead1234beef
+#define CL2 0xdead1234deadbeef
+
+struct AA
+{
+ unsigned int ax;
+ unsigned long ay;
+ unsigned long az;
+};
+
+struct SA
+{
+ int p;
+ struct AA arr[2];
+ short ee;
+};
+
+struct ZA
+{
+ struct SA s;
+ short q;
+};
+
+struct AB
+{
+ unsigned int bx;
+ unsigned long by;
+ unsigned long bz;
+};
+
+struct SB
+{
+ int p;
+ struct AB arr[2];
+ short ee;
+};
+
+struct ZB
+{
+ struct SB s;
+ short q;
+};
+
+void __attribute__((noinline))
+getb (struct SB *d, struct ZB *p)
+{
+ struct SB tmp = p->s;
+ *d = tmp;
+}
diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc
index dbfae5e..32fa289 100644
--- a/gcc/tree-sra.cc
+++ b/gcc/tree-sra.cc
@@ -985,18 +985,101 @@ create_access (tree expr, gimple *stmt, bool write)
return access;
}
+/* Given an array type TYPE, extract element size to *EL_SIZE, minimum index to
+ *IDX and maximum index to *MAX so that the caller can iterate over all
+ elements and return true, except if the array is known to be zero-length,
+ then return false. */
-/* Return true iff TYPE is scalarizable - i.e. a RECORD_TYPE or fixed-length
- ARRAY_TYPE with fields that are either of gimple register types (excluding
- bit-fields) or (recursively) scalarizable types. CONST_DECL must be true if
- we are considering a decl from constant pool. If it is false, char arrays
- will be refused. */
+static bool
+prepare_iteration_over_array_elts (tree type, HOST_WIDE_INT *el_size,
+ offset_int *idx, offset_int *max)
+{
+ tree elem_size = TYPE_SIZE (TREE_TYPE (type));
+ gcc_assert (elem_size && tree_fits_shwi_p (elem_size));
+ *el_size = tree_to_shwi (elem_size);
+ gcc_assert (*el_size > 0);
+
+ tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
+ gcc_assert (TREE_CODE (minidx) == INTEGER_CST);
+ tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
+ /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */
+ if (!maxidx)
+ return false;
+ gcc_assert (TREE_CODE (maxidx) == INTEGER_CST);
+ tree domain = TYPE_DOMAIN (type);
+ /* MINIDX and MAXIDX are inclusive, and must be interpreted in
+ DOMAIN (e.g. signed int, whereas min/max may be size_int). */
+ *idx = wi::to_offset (minidx);
+ *max = wi::to_offset (maxidx);
+ if (!TYPE_UNSIGNED (domain))
+ {
+ *idx = wi::sext (*idx, TYPE_PRECISION (domain));
+ *max = wi::sext (*max, TYPE_PRECISION (domain));
+ }
+ return true;
+}
+
+/* A structure to track collecting padding and hold collected padding
+ information. */
+
+class sra_padding_collecting
+{
+public:
+ /* Given that there won't be any data until at least OFFSET, add an
+ appropriate entry to the list of paddings or extend the last one. */
+ void record_padding (HOST_WIDE_INT offset);
+ /* Vector of pairs describing contiguous pieces of padding, each pair
+ consisting of offset and length. */
+ auto_vec<std::pair<HOST_WIDE_INT, HOST_WIDE_INT>, 10> m_padding;
+ /* Offset where data should continue after the last seen actual bit of data
+ if there was no padding. */
+ HOST_WIDE_INT m_data_until = 0;
+};
+
+/* Given that there won't be any data until at least OFFSET, add an appropriate
+ entry to the list of paddings or extend the last one. */
+
+void sra_padding_collecting::record_padding (HOST_WIDE_INT offset)
+{
+ if (offset > m_data_until)
+ {
+ HOST_WIDE_INT psz = offset - m_data_until;
+ if (!m_padding.is_empty ()
+ && ((m_padding[m_padding.length () - 1].first
+ + m_padding[m_padding.length () - 1].second) == offset))
+ m_padding[m_padding.length () - 1].second += psz;
+ else
+ m_padding.safe_push (std::make_pair (m_data_until, psz));
+ }
+}
+
+/* Return true iff TYPE is totally scalarizable - i.e. a RECORD_TYPE or
+ fixed-length ARRAY_TYPE with fields that are either of gimple register types
+ (excluding bit-fields) or (recursively) scalarizable types. CONST_DECL must
+ be true if we are considering a decl from constant pool. If it is false,
+ char arrays will be refused.
+
+ TOTAL_OFFSET is the offset of TYPE within any outer type that is being
+ examined.
+
+ If PC is non-NULL, collect padding information into the vector within the
+ structure. The information is however only complete if the function returns
+ true and does not contain any padding at its end. */
static bool
-scalarizable_type_p (tree type, bool const_decl)
+totally_scalarizable_type_p (tree type, bool const_decl,
+ HOST_WIDE_INT total_offset,
+ sra_padding_collecting *pc)
{
if (is_gimple_reg_type (type))
- return true;
+ {
+ if (pc)
+ {
+ pc->record_padding (total_offset);
+ pc->m_data_until = total_offset + tree_to_shwi (TYPE_SIZE (type));
+ }
+ return true;
+ }
if (type_contains_placeholder_p (type))
return false;
@@ -1011,6 +1094,8 @@ scalarizable_type_p (tree type, bool const_decl)
{
tree ft = TREE_TYPE (fld);
+ if (!DECL_SIZE (fld))
+ return false;
if (zerop (DECL_SIZE (fld)))
continue;
@@ -1025,7 +1110,8 @@ scalarizable_type_p (tree type, bool const_decl)
if (DECL_BIT_FIELD (fld))
return false;
- if (!scalarizable_type_p (ft, const_decl))
+ if (!totally_scalarizable_type_p (ft, const_decl, total_offset + pos,
+ pc))
return false;
}
@@ -1054,9 +1140,35 @@ scalarizable_type_p (tree type, bool const_decl)
/* Variable-length array, do not allow scalarization. */
return false;
+ unsigned old_padding_len = 0;
+ if (pc)
+ old_padding_len = pc->m_padding.length ();
tree elem = TREE_TYPE (type);
- if (!scalarizable_type_p (elem, const_decl))
+ if (!totally_scalarizable_type_p (elem, const_decl, total_offset, pc))
return false;
+ if (pc)
+ {
+ unsigned new_padding_len = pc->m_padding.length ();
+ HOST_WIDE_INT el_size;
+ offset_int idx, max;
+ if (!prepare_iteration_over_array_elts (type, &el_size, &idx, &max))
+ return true;
+ pc->record_padding (total_offset + el_size);
+ ++idx;
+ for (HOST_WIDE_INT pos = total_offset + el_size;
+ idx <= max;
+ pos += el_size, ++idx)
+ {
+ for (unsigned i = old_padding_len; i < new_padding_len; i++)
+ {
+ HOST_WIDE_INT pp
+ = pos + pc->m_padding[i].first - total_offset;
+ HOST_WIDE_INT psz = pc->m_padding[i].second;
+ pc->m_padding.safe_push (std::make_pair (pp, psz));
+ }
+ }
+ pc->m_data_until = total_offset + tree_to_shwi (TYPE_SIZE (type));
+ }
return true;
}
default:
@@ -3540,28 +3652,12 @@ totally_scalarize_subtree (struct access *root)
case ARRAY_TYPE:
{
tree elemtype = TREE_TYPE (root->type);
- tree elem_size = TYPE_SIZE (elemtype);
- gcc_assert (elem_size && tree_fits_shwi_p (elem_size));
- HOST_WIDE_INT el_size = tree_to_shwi (elem_size);
- gcc_assert (el_size > 0);
-
- tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (root->type));
- gcc_assert (TREE_CODE (minidx) == INTEGER_CST);
- tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (root->type));
- /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */
- if (!maxidx)
- goto out;
- gcc_assert (TREE_CODE (maxidx) == INTEGER_CST);
- tree domain = TYPE_DOMAIN (root->type);
- /* MINIDX and MAXIDX are inclusive, and must be interpreted in
- DOMAIN (e.g. signed int, whereas min/max may be size_int). */
- offset_int idx = wi::to_offset (minidx);
- offset_int max = wi::to_offset (maxidx);
- if (!TYPE_UNSIGNED (domain))
- {
- idx = wi::sext (idx, TYPE_PRECISION (domain));
- max = wi::sext (max, TYPE_PRECISION (domain));
- }
+ HOST_WIDE_INT el_size;
+ offset_int idx, max;
+ if (!prepare_iteration_over_array_elts (root->type, &el_size,
+ &idx, &max))
+ break;
+
for (HOST_WIDE_INT pos = root->offset;
idx <= max;
pos += el_size, ++idx)
@@ -3587,7 +3683,8 @@ totally_scalarize_subtree (struct access *root)
? &last_seen_sibling->next_sibling
: &root->first_child);
tree nref = build4 (ARRAY_REF, elemtype, root->expr,
- wide_int_to_tree (domain, idx),
+ wide_int_to_tree (TYPE_DOMAIN (root->type),
+ idx),
NULL_TREE, NULL_TREE);
struct access *new_child
= create_total_access_and_reshape (root, pos, el_size, elemtype,
@@ -3605,11 +3702,34 @@ totally_scalarize_subtree (struct access *root)
default:
gcc_unreachable ();
}
-
- out:
return true;
}
+/* Get the total total scalarization size limit in the current function. */
+
+unsigned HOST_WIDE_INT
+sra_get_max_scalarization_size (void)
+{
+ bool optimize_speed_p = !optimize_function_for_size_p (cfun);
+ /* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>,
+ fall back to a target default. */
+ unsigned HOST_WIDE_INT max_scalarization_size
+ = get_move_ratio (optimize_speed_p) * UNITS_PER_WORD;
+
+ if (optimize_speed_p)
+ {
+ if (OPTION_SET_P (param_sra_max_scalarization_size_speed))
+ max_scalarization_size = param_sra_max_scalarization_size_speed;
+ }
+ else
+ {
+ if (OPTION_SET_P (param_sra_max_scalarization_size_size))
+ max_scalarization_size = param_sra_max_scalarization_size_size;
+ }
+ max_scalarization_size *= BITS_PER_UNIT;
+ return max_scalarization_size;
+}
+
/* Go through all accesses collected throughout the (intraprocedural) analysis
stage, exclude overlapping ones, identify representatives and build trees
out of them, making decisions about scalarization on the way. Return true
@@ -3637,24 +3757,8 @@ analyze_all_variable_accesses (void)
propagate_all_subaccesses ();
- bool optimize_speed_p = !optimize_function_for_size_p (cfun);
- /* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>,
- fall back to a target default. */
unsigned HOST_WIDE_INT max_scalarization_size
- = get_move_ratio (optimize_speed_p) * UNITS_PER_WORD;
-
- if (optimize_speed_p)
- {
- if (OPTION_SET_P (param_sra_max_scalarization_size_speed))
- max_scalarization_size = param_sra_max_scalarization_size_speed;
- }
- else
- {
- if (OPTION_SET_P (param_sra_max_scalarization_size_size))
- max_scalarization_size = param_sra_max_scalarization_size_size;
- }
- max_scalarization_size *= BITS_PER_UNIT;
-
+ = sra_get_max_scalarization_size ();
EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi)
if (bitmap_bit_p (should_scalarize_away_bitmap, i)
&& !bitmap_bit_p (cannot_scalarize_away_bitmap, i))
@@ -3679,7 +3783,9 @@ analyze_all_variable_accesses (void)
access;
access = access->next_grp)
if (!can_totally_scalarize_forest_p (access)
- || !scalarizable_type_p (access->type, constant_decl_p (var)))
+ || !totally_scalarizable_type_p (access->type,
+ constant_decl_p (var),
+ 0, nullptr))
{
all_types_ok = false;
break;
@@ -5100,3 +5206,45 @@ make_pass_sra (gcc::context *ctxt)
{
return new pass_sra (ctxt);
}
+
+
+/* If type T cannot be totally scalarized, return false. Otherwise return true
+ and push to the vector within PC offsets and lengths of all padding in the
+ type as total scalarization would encounter it. */
+
+static bool
+check_ts_and_push_padding_to_vec (tree type, sra_padding_collecting *pc)
+{
+ if (!totally_scalarizable_type_p (type, true /* optimistic value */,
+ 0, pc))
+ return false;
+
+ pc->record_padding (tree_to_shwi (TYPE_SIZE (type)));
+ return true;
+}
+
+/* Given two types in an assignment, return true either if any one cannot be
+ totally scalarized or if they have padding (i.e. not copied bits) */
+
+bool
+sra_total_scalarization_would_copy_same_data_p (tree t1, tree t2)
+{
+ sra_padding_collecting p1;
+ if (!check_ts_and_push_padding_to_vec (t1, &p1))
+ return true;
+
+ sra_padding_collecting p2;
+ if (!check_ts_and_push_padding_to_vec (t2, &p2))
+ return true;
+
+ unsigned l = p1.m_padding.length ();
+ if (l != p2.m_padding.length ())
+ return false;
+ for (unsigned i = 0; i < l; i++)
+ if (p1.m_padding[i].first != p2.m_padding[i].first
+ || p1.m_padding[i].second != p2.m_padding[i].second)
+ return false;
+
+ return true;
+}
+
diff --git a/gcc/tree-sra.h b/gcc/tree-sra.h
index 3a12870..a4aab21 100644
--- a/gcc/tree-sra.h
+++ b/gcc/tree-sra.h
@@ -20,6 +20,9 @@ along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
bool type_internals_preclude_sra_p (tree type, const char **msg);
+unsigned HOST_WIDE_INT sra_get_max_scalarization_size (void);
+bool sra_total_scalarization_would_copy_same_data_p (tree t1, tree t2);
+
/* Return true iff TYPE is stdarg va_list type (which early SRA and IPA-SRA
should leave alone). */