From 5b9e89c922dc2e7e8b8da644bd3a8917c16b22ac Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Wed, 29 Jan 2020 13:13:12 +0100 Subject: SRA: Add verification of accesses 2020-01-29 Martin Jambor * tree-sra.c (verify_sra_access_forest): New function. (verify_all_sra_access_forests): Likewise. (create_artificial_child_access): Set parent. (analyze_all_variable_accesses): Call the verifier. --- gcc/tree-sra.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 875d5b2..36106fe 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2321,6 +2321,88 @@ build_access_trees (struct access *access) return true; } +/* Traverse the access forest where ROOT is the first root and verify that + various important invariants hold true. */ + +DEBUG_FUNCTION void +verify_sra_access_forest (struct access *root) +{ + struct access *access = root; + tree first_base = root->base; + gcc_assert (DECL_P (first_base)); + do + { + gcc_assert (access->base == first_base); + if (access->parent) + gcc_assert (access->offset >= access->parent->offset + && access->size <= access->parent->size); + if (access->next_sibling) + gcc_assert (access->next_sibling->offset + >= access->offset + access->size); + + poly_int64 poffset, psize, pmax_size; + bool reverse; + tree base = get_ref_base_and_extent (access->expr, &poffset, &psize, + &pmax_size, &reverse); + HOST_WIDE_INT offset, size, max_size; + if (!poffset.is_constant (&offset) + || !psize.is_constant (&size) + || !pmax_size.is_constant (&max_size)) + gcc_unreachable (); + gcc_assert (base == first_base); + gcc_assert (offset == access->offset); + gcc_assert (access->grp_unscalarizable_region + || size == max_size); + gcc_assert (max_size == access->size); + gcc_assert (reverse == access->reverse); + + if (access->first_child) + { + gcc_assert (access->first_child->parent == access); + access = access->first_child; + } + else if (access->next_sibling) + { + gcc_assert (access->next_sibling->parent == access->parent); + access = access->next_sibling; + } + else + { + while (access->parent && !access->next_sibling) + access = access->parent; + if (access->next_sibling) + access = access->next_sibling; + else + { + gcc_assert (access == root); + root = root->next_grp; + access = root; + } + } + } + while (access); +} + +/* Verify access forests of all candidates with accesses by calling + verify_access_forest on each on them. */ + +DEBUG_FUNCTION void +verify_all_sra_access_forests (void) +{ + bitmap_iterator bi; + unsigned i; + EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi) + { + tree var = candidate (i); + struct access *access = get_first_repr_for_decl (var); + if (access) + { + gcc_assert (access->base == var); + verify_sra_access_forest (access); + } + } +} + /* Return true if expr contains some ARRAY_REFs into a variable bounded array. */ @@ -2566,6 +2648,7 @@ create_artificial_child_access (struct access *parent, struct access *model, access->offset = new_offset; access->size = model->size; access->type = model->type; + access->parent = parent; access->grp_write = set_grp_write; access->grp_read = false; access->reverse = model->reverse; @@ -2850,6 +2933,9 @@ analyze_all_variable_accesses (void) propagate_all_subaccesses (); + if (flag_checking) + verify_all_sra_access_forests (); + bitmap_copy (tmp, candidate_bitmap); EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) { -- cgit v1.1 From 636e80eea24b780f1d5f4c14c58fc00001df8508 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Wed, 29 Jan 2020 13:13:13 +0100 Subject: SRA: Total scalarization after access propagation [PR92706] 2020-01-29 Martin Jambor PR tree-optimization/92706 * tree-sra.c (struct access): Adjust comment of grp_total_scalarization. (find_access_in_subtree): Look for single children spanning an entire access. (scalarizable_type_p): Allow register accesses, adjust callers. (completely_scalarize): Remove function. (scalarize_elem): Likewise. (create_total_scalarization_access): Likewise. (sort_and_splice_var_accesses): Do not track total scalarization flags. (analyze_access_subtree): New parameter totally, adjust to new meaning of grp_total_scalarization. (analyze_access_trees): Pass new parameter to analyze_access_subtree. (can_totally_scalarize_forest_p): New function. (create_total_scalarization_access): Likewise. (create_total_access_and_reshape): Likewise. (total_should_skip_creating_access): Likewise. (totally_scalarize_subtree): Likewise. (analyze_all_variable_accesses): Perform total scalarization after subaccess propagation using the new functions above. (initialize_constant_pool_replacements): Output initializers by traversing the access tree. testsuite/ * gcc.dg/tree-ssa/pr92706-2.c: New test. * gcc.dg/guality/pr59776.c: Xfail tests for s2.g. --- gcc/tree-sra.c | 666 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 484 insertions(+), 182 deletions(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 36106fe..2b08498 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -211,8 +211,11 @@ struct access is not propagated in the access tree in any direction. */ unsigned grp_scalar_write : 1; - /* Is this access an artificial one created to scalarize some record - entirely? */ + /* In a root of an access tree, true means that the entire tree should be + totally scalarized - that all scalar leafs should be scalarized and + non-root grp_total_scalarization accesses should be honored. Otherwise, + non-root accesses with grp_total_scalarization should never get scalar + replacements. */ unsigned grp_total_scalarization : 1; /* Other passes of the analysis use this bit to make function @@ -485,6 +488,15 @@ find_access_in_subtree (struct access *access, HOST_WIDE_INT offset, access = child; } + /* Total scalarization does not replace single field structures with their + single field but rather creates an access for them underneath. Look for + it. */ + if (access) + while (access->first_child + && access->first_child->offset == offset + && access->first_child->size == size) + access = access->first_child; + return access; } @@ -856,7 +868,8 @@ create_access (tree expr, gimple *stmt, bool write) static bool scalarizable_type_p (tree type, bool const_decl) { - gcc_assert (!is_gimple_reg_type (type)); + if (is_gimple_reg_type (type)) + return true; if (type_contains_placeholder_p (type)) return false; @@ -871,8 +884,7 @@ scalarizable_type_p (tree type, bool const_decl) if (DECL_BIT_FIELD (fld)) return false; - if (!is_gimple_reg_type (ft) - && !scalarizable_type_p (ft, const_decl)) + if (!scalarizable_type_p (ft, const_decl)) return false; } @@ -902,8 +914,7 @@ scalarizable_type_p (tree type, bool const_decl) return false; tree elem = TREE_TYPE (type); - if (!is_gimple_reg_type (elem) - && !scalarizable_type_p (elem, const_decl)) + if (!scalarizable_type_p (elem, const_decl)) return false; return true; } @@ -912,114 +923,6 @@ scalarizable_type_p (tree type, bool const_decl) } } -static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, bool, tree, tree); - -/* Create total_scalarization accesses for all scalar fields of a member - of type DECL_TYPE conforming to scalarizable_type_p. BASE - must be the top-most VAR_DECL representing the variable; within that, - OFFSET locates the member and REF must be the memory reference expression for - the member. */ - -static void -completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref) -{ - switch (TREE_CODE (decl_type)) - { - case RECORD_TYPE: - for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld)) - if (TREE_CODE (fld) == FIELD_DECL) - { - HOST_WIDE_INT pos = offset + int_bit_position (fld); - tree ft = TREE_TYPE (fld); - tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE); - - scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), - TYPE_REVERSE_STORAGE_ORDER (decl_type), - nref, ft); - } - break; - case ARRAY_TYPE: - { - tree elemtype = TREE_TYPE (decl_type); - tree elem_size = TYPE_SIZE (elemtype); - gcc_assert (elem_size && tree_fits_shwi_p (elem_size)); - HOST_WIDE_INT el_size = tree_to_shwi (elem_size); - gcc_assert (el_size > 0); - - tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type)); - gcc_assert (TREE_CODE (minidx) == INTEGER_CST); - tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type)); - /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */ - if (maxidx) - { - gcc_assert (TREE_CODE (maxidx) == INTEGER_CST); - tree domain = TYPE_DOMAIN (decl_type); - /* MINIDX and MAXIDX are inclusive, and must be interpreted in - DOMAIN (e.g. signed int, whereas min/max may be size_int). */ - offset_int idx = wi::to_offset (minidx); - offset_int max = wi::to_offset (maxidx); - if (!TYPE_UNSIGNED (domain)) - { - idx = wi::sext (idx, TYPE_PRECISION (domain)); - max = wi::sext (max, TYPE_PRECISION (domain)); - } - for (int el_off = offset; idx <= max; ++idx) - { - tree nref = build4 (ARRAY_REF, elemtype, - ref, - wide_int_to_tree (domain, idx), - NULL_TREE, NULL_TREE); - scalarize_elem (base, el_off, el_size, - TYPE_REVERSE_STORAGE_ORDER (decl_type), - nref, elemtype); - el_off += el_size; - } - } - } - break; - default: - gcc_unreachable (); - } -} - -/* Create total_scalarization accesses for a member of type TYPE, which must - satisfy either is_gimple_reg_type or scalarizable_type_p. BASE must be the - top-most VAR_DECL representing the variable; within that, POS and SIZE locate - the member, REVERSE gives its torage order. and REF must be the reference - expression for it. */ - -static void -scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size, bool reverse, - tree ref, tree type) -{ - if (is_gimple_reg_type (type)) - { - struct access *access = create_access_1 (base, pos, size); - access->expr = ref; - access->type = type; - access->grp_total_scalarization = 1; - access->reverse = reverse; - /* Accesses for intraprocedural SRA can have their stmt NULL. */ - } - else - completely_scalarize (base, type, pos, ref); -} - -/* Create a total_scalarization access for VAR as a whole. VAR must be of a - RECORD_TYPE or ARRAY_TYPE conforming to scalarizable_type_p. */ - -static void -create_total_scalarization_access (tree var) -{ - HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE (var)); - struct access *access; - - access = create_access_1 (var, 0, size); - access->expr = var; - access->type = TREE_TYPE (var); - access->grp_total_scalarization = 1; -} - /* Return true if REF has an VIEW_CONVERT_EXPR somewhere in it. */ static inline bool @@ -2029,7 +1932,6 @@ sort_and_splice_var_accesses (tree var) bool grp_assignment_read = access->grp_assignment_read; bool grp_assignment_write = access->grp_assignment_write; bool multiple_scalar_reads = false; - bool total_scalarization = access->grp_total_scalarization; bool grp_partial_lhs = access->grp_partial_lhs; bool first_scalar = is_gimple_reg_type (access->type); bool unscalarizable_region = access->grp_unscalarizable_region; @@ -2081,7 +1983,6 @@ sort_and_splice_var_accesses (tree var) grp_assignment_write |= ac2->grp_assignment_write; grp_partial_lhs |= ac2->grp_partial_lhs; unscalarizable_region |= ac2->grp_unscalarizable_region; - total_scalarization |= ac2->grp_total_scalarization; relink_to_new_repr (access, ac2); /* If there are both aggregate-type and scalar-type accesses with @@ -2122,9 +2023,7 @@ sort_and_splice_var_accesses (tree var) access->grp_scalar_write = grp_scalar_write; access->grp_assignment_read = grp_assignment_read; access->grp_assignment_write = grp_assignment_write; - access->grp_hint = total_scalarization - || (multiple_scalar_reads && !constant_decl_p (var)); - access->grp_total_scalarization = total_scalarization; + access->grp_hint = multiple_scalar_reads && !constant_decl_p (var); access->grp_partial_lhs = grp_partial_lhs; access->grp_unscalarizable_region = unscalarizable_region; access->grp_same_access_path = grp_same_access_path; @@ -2420,15 +2319,16 @@ expr_with_var_bounded_array_refs_p (tree expr) } /* Analyze the subtree of accesses rooted in ROOT, scheduling replacements when - both seeming beneficial and when ALLOW_REPLACEMENTS allows it. Also set all - sorts of access flags appropriately along the way, notably always set - grp_read and grp_assign_read according to MARK_READ and grp_write when - MARK_WRITE is true. + both seeming beneficial and when ALLOW_REPLACEMENTS allows it. If TOTALLY + is set, we are totally scalarizing the aggregate. Also set all sorts of + access flags appropriately along the way, notably always set grp_read and + grp_assign_read according to MARK_READ and grp_write when MARK_WRITE is + true. Creating a replacement for a scalar access is considered beneficial if its - grp_hint is set (this means we are either attempting total scalarization or - there is more than one direct read access) or according to the following - table: + grp_hint ot TOTALLY is set (this means either that there is more than one + direct read access or that we are attempting total scalarization) or + according to the following table: Access written to through a scalar type (once or more times) | @@ -2459,7 +2359,7 @@ expr_with_var_bounded_array_refs_p (tree expr) static bool analyze_access_subtree (struct access *root, struct access *parent, - bool allow_replacements) + bool allow_replacements, bool totally) { struct access *child; HOST_WIDE_INT limit = root->offset + root->size; @@ -2477,8 +2377,6 @@ analyze_access_subtree (struct access *root, struct access *parent, root->grp_write = 1; if (parent->grp_assignment_write) root->grp_assignment_write = 1; - if (parent->grp_total_scalarization) - root->grp_total_scalarization = 1; if (!parent->grp_same_access_path) root->grp_same_access_path = 0; } @@ -2493,10 +2391,10 @@ analyze_access_subtree (struct access *root, struct access *parent, { hole |= covered_to < child->offset; sth_created |= analyze_access_subtree (child, root, - allow_replacements && !scalar); + allow_replacements && !scalar, + totally); root->grp_unscalarized_data |= child->grp_unscalarized_data; - root->grp_total_scalarization &= child->grp_total_scalarization; if (child->grp_covered) covered_to += child->size; else @@ -2504,7 +2402,9 @@ analyze_access_subtree (struct access *root, struct access *parent, } if (allow_replacements && scalar && !root->first_child - && (root->grp_hint + && (totally || !root->grp_total_scalarization) + && (totally + || root->grp_hint || ((root->grp_scalar_read || root->grp_assignment_read) && (root->grp_scalar_write || root->grp_assignment_write)))) { @@ -2546,6 +2446,7 @@ analyze_access_subtree (struct access *root, struct access *parent, { if (allow_replacements && scalar && !root->first_child + && !root->grp_total_scalarization && (root->grp_scalar_write || root->grp_assignment_write) && !bitmap_bit_p (cannot_scalarize_away_bitmap, DECL_UID (root->base))) @@ -2566,7 +2467,7 @@ analyze_access_subtree (struct access *root, struct access *parent, root->grp_total_scalarization = 0; } - if (!hole || root->grp_total_scalarization) + if (!hole || totally) root->grp_covered = 1; else if (root->grp_write || comes_initialized_p (root->base)) root->grp_unscalarized_data = 1; /* not covered and written to */ @@ -2582,7 +2483,8 @@ analyze_access_trees (struct access *access) while (access) { - if (analyze_access_subtree (access, NULL, true)) + if (analyze_access_subtree (access, NULL, true, + access->grp_total_scalarization)) ret = true; access = access->next_grp; } @@ -2855,6 +2757,369 @@ propagate_all_subaccesses (void) } } +/* Return true if the forest beginning with ROOT does not contain + unscalarizable regions or non-byte aligned accesses. */ + +static bool +can_totally_scalarize_forest_p (struct access *root) +{ + struct access *access = root; + do + { + if (access->grp_unscalarizable_region + || (access->offset % BITS_PER_UNIT) != 0 + || (access->size % BITS_PER_UNIT) != 0 + || (is_gimple_reg_type (access->type) + && access->first_child)) + return false; + + if (access->first_child) + access = access->first_child; + else if (access->next_sibling) + access = access->next_sibling; + else + { + while (access->parent && !access->next_sibling) + access = access->parent; + if (access->next_sibling) + access = access->next_sibling; + else + { + gcc_assert (access == root); + root = root->next_grp; + access = root; + } + } + } + while (access); + return true; +} + +/* Create and return an ACCESS in PARENT spanning from POS with SIZE, TYPE and + reference EXPR for total scalarization purposes and mark it as such. Within + the children of PARENT, link it in between PTR and NEXT_SIBLING. */ + +static struct access * +create_total_scalarization_access (struct access *parent, HOST_WIDE_INT pos, + HOST_WIDE_INT size, tree type, tree expr, + struct access **ptr, + struct access *next_sibling) +{ + struct access *access = access_pool.allocate (); + memset (access, 0, sizeof (struct access)); + access->base = parent->base; + access->offset = pos; + access->size = size; + access->expr = expr; + access->type = type; + access->parent = parent; + access->grp_write = parent->grp_write; + access->grp_total_scalarization = 1; + access->grp_hint = 1; + access->grp_same_access_path = path_comparable_for_same_access (expr); + access->reverse = reverse_storage_order_for_component_p (expr); + + access->next_sibling = next_sibling; + *ptr = access; + return access; +} + +/* Create and return an ACCESS in PARENT spanning from POS with SIZE, TYPE and + reference EXPR for total scalarization purposes and mark it as such, link it + at *PTR and reshape the tree so that those elements at *PTR and their + siblings which fall within the part described by POS and SIZE are moved to + be children of the new access. If a partial overlap is detected, return + NULL. */ + +static struct access * +create_total_access_and_reshape (struct access *parent, HOST_WIDE_INT pos, + HOST_WIDE_INT size, tree type, tree expr, + struct access **ptr) +{ + struct access **p = ptr; + + while (*p && (*p)->offset < pos + size) + { + if ((*p)->offset + (*p)->size > pos + size) + return NULL; + p = &(*p)->next_sibling; + } + + struct access *next_child = *ptr; + struct access *new_acc + = create_total_scalarization_access (parent, pos, size, type, expr, + ptr, *p); + if (p != ptr) + { + new_acc->first_child = next_child; + *p = NULL; + for (struct access *a = next_child; a; a = a->next_sibling) + a->parent = new_acc; + } + return new_acc; +} + +static bool totally_scalarize_subtree (struct access *root); + +/* Return true if INNER is either the same type as OUTER or if it is the type + of a record field in OUTER at offset zero, possibly in nested + sub-records. */ + +static bool +access_and_field_type_match_p (tree outer, tree inner) +{ + if (TYPE_MAIN_VARIANT (outer) == TYPE_MAIN_VARIANT (inner)) + return true; + if (TREE_CODE (outer) != RECORD_TYPE) + return false; + tree fld = TYPE_FIELDS (outer); + while (fld) + { + if (TREE_CODE (fld) == FIELD_DECL) + { + if (!zerop (DECL_FIELD_OFFSET (fld))) + return false; + if (TYPE_MAIN_VARIANT (TREE_TYPE (fld)) == inner) + return true; + if (TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE) + fld = TYPE_FIELDS (TREE_TYPE (fld)); + else + return false; + } + else + fld = DECL_CHAIN (fld); + } + return false; +} + +/* Return type of total_should_skip_creating_access indicating whether a total + scalarization access for a field/element should be created, whether it + already exists or whether the entire total scalarization has to fail. */ + +enum total_sra_field_state {TOTAL_FLD_CREATE, TOTAL_FLD_DONE, TOTAL_FLD_FAILED}; + +/* Do all the necessary steps in total scalarization when the given aggregate + type has a TYPE at POS with the given SIZE should be put into PARENT and + when we have processed all its siblings with smaller offsets up until and + including LAST_SEEN_SIBLING (which can be NULL). + + If some further siblings are to be skipped, set *LAST_SEEN_SIBLING as + appropriate. Return TOTAL_FLD_CREATE id the caller should carry on with + creating a new access, TOTAL_FLD_DONE if access or accesses capable of + representing the described part of the aggregate for the purposes of total + scalarization already exist or TOTAL_FLD_FAILED if there is a problem which + prevents total scalarization from happening at all. */ + +static enum total_sra_field_state +total_should_skip_creating_access (struct access *parent, + struct access **last_seen_sibling, + tree type, HOST_WIDE_INT pos, + HOST_WIDE_INT size) +{ + struct access *next_child; + if (!*last_seen_sibling) + next_child = parent->first_child; + else + next_child = (*last_seen_sibling)->next_sibling; + + /* First, traverse the chain of siblings until it points to an access with + offset at least equal to POS. Check all skipped accesses whether they + span the POS boundary and if so, return with a failure. */ + while (next_child && next_child->offset < pos) + { + if (next_child->offset + next_child->size > pos) + return TOTAL_FLD_FAILED; + *last_seen_sibling = next_child; + next_child = next_child->next_sibling; + } + + /* Now check whether next_child has exactly the right POS and SIZE and if so, + whether it can represent what we need and can be totally scalarized + itself. */ + if (next_child && next_child->offset == pos + && next_child->size == size) + { + if (!is_gimple_reg_type (next_child->type) + && (!access_and_field_type_match_p (type, next_child->type) + || !totally_scalarize_subtree (next_child))) + return TOTAL_FLD_FAILED; + + *last_seen_sibling = next_child; + return TOTAL_FLD_DONE; + } + + /* If the child we're looking at would partially overlap, we just cannot + totally scalarize. */ + if (next_child + && next_child->offset < pos + size + && next_child->offset + next_child->size > pos + size) + return TOTAL_FLD_FAILED; + + if (is_gimple_reg_type (type)) + { + /* We don't scalarize accesses that are children of other scalar type + accesses, so if we go on and create an access for a register type, + there should not be any pre-existing children. There are rare cases + where the requested type is a vector but we already have register + accesses for all its elements which is equally good. Detect that + situation or whether we need to bail out. */ + + HOST_WIDE_INT covered = pos; + bool skipping = false; + while (next_child + && next_child->offset + next_child->size <= pos + size) + { + if (next_child->offset != covered + || !is_gimple_reg_type (next_child->type)) + return TOTAL_FLD_FAILED; + + covered += next_child->size; + *last_seen_sibling = next_child; + next_child = next_child->next_sibling; + skipping = true; + } + + if (skipping) + { + if (covered != pos + size) + return TOTAL_FLD_FAILED; + else + return TOTAL_FLD_DONE; + } + } + + return TOTAL_FLD_CREATE; +} + +/* Go over sub-tree rooted in ROOT and attempt to create scalar accesses + spanning all uncovered areas covered by ROOT, return false if the attempt + failed. All created accesses will have grp_unscalarizable_region set (and + should be ignored if the function returns false). */ + +static bool +totally_scalarize_subtree (struct access *root) +{ + gcc_checking_assert (!root->grp_unscalarizable_region); + gcc_checking_assert (!is_gimple_reg_type (root->type)); + + struct access *last_seen_sibling = NULL; + + switch (TREE_CODE (root->type)) + { + case RECORD_TYPE: + for (tree fld = TYPE_FIELDS (root->type); fld; fld = DECL_CHAIN (fld)) + if (TREE_CODE (fld) == FIELD_DECL) + { + tree ft = TREE_TYPE (fld); + HOST_WIDE_INT fsize = tree_to_uhwi (DECL_SIZE (fld)); + if (!fsize) + continue; + + HOST_WIDE_INT pos = root->offset + int_bit_position (fld); + enum total_sra_field_state + state = total_should_skip_creating_access (root, + &last_seen_sibling, + ft, pos, fsize); + switch (state) + { + case TOTAL_FLD_FAILED: + return false; + case TOTAL_FLD_DONE: + continue; + case TOTAL_FLD_CREATE: + break; + default: + gcc_unreachable (); + } + + struct access **p = (last_seen_sibling + ? &last_seen_sibling->next_sibling + : &root->first_child); + tree nref = build3 (COMPONENT_REF, ft, root->expr, fld, NULL_TREE); + struct access *new_child + = create_total_access_and_reshape (root, pos, fsize, ft, nref, p); + if (!new_child) + return false; + + if (!is_gimple_reg_type (ft) + && !totally_scalarize_subtree (new_child)) + return false; + last_seen_sibling = new_child; + } + break; + case ARRAY_TYPE: + { + tree elemtype = TREE_TYPE (root->type); + tree elem_size = TYPE_SIZE (elemtype); + gcc_assert (elem_size && tree_fits_shwi_p (elem_size)); + HOST_WIDE_INT el_size = tree_to_shwi (elem_size); + gcc_assert (el_size > 0); + + tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (root->type)); + gcc_assert (TREE_CODE (minidx) == INTEGER_CST); + tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (root->type)); + /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */ + if (!maxidx) + goto out; + gcc_assert (TREE_CODE (maxidx) == INTEGER_CST); + tree domain = TYPE_DOMAIN (root->type); + /* MINIDX and MAXIDX are inclusive, and must be interpreted in + DOMAIN (e.g. signed int, whereas min/max may be size_int). */ + offset_int idx = wi::to_offset (minidx); + offset_int max = wi::to_offset (maxidx); + if (!TYPE_UNSIGNED (domain)) + { + idx = wi::sext (idx, TYPE_PRECISION (domain)); + max = wi::sext (max, TYPE_PRECISION (domain)); + } + for (HOST_WIDE_INT pos = root->offset; + idx <= max; + pos += el_size, ++idx) + { + enum total_sra_field_state + state = total_should_skip_creating_access (root, + &last_seen_sibling, + elemtype, pos, + el_size); + switch (state) + { + case TOTAL_FLD_FAILED: + return false; + case TOTAL_FLD_DONE: + continue; + case TOTAL_FLD_CREATE: + break; + default: + gcc_unreachable (); + } + + struct access **p = (last_seen_sibling + ? &last_seen_sibling->next_sibling + : &root->first_child); + tree nref = build4 (ARRAY_REF, elemtype, root->expr, + wide_int_to_tree (domain, idx), + NULL_TREE, NULL_TREE); + struct access *new_child + = create_total_access_and_reshape (root, pos, el_size, elemtype, + nref, p); + if (!new_child) + return false; + + if (!is_gimple_reg_type (elemtype) + && !totally_scalarize_subtree (new_child)) + return false; + last_seen_sibling = new_child; + } + } + break; + default: + gcc_unreachable (); + } + + out: + return true; +} + /* Go through all accesses collected throughout the (intraprocedural) analysis stage, exclude overlapping ones, identify representatives and build trees out of them, making decisions about scalarization on the way. Return true @@ -2867,8 +3132,22 @@ analyze_all_variable_accesses (void) bitmap tmp = BITMAP_ALLOC (NULL); bitmap_iterator bi; unsigned i; - bool optimize_speed_p = !optimize_function_for_size_p (cfun); + bitmap_copy (tmp, candidate_bitmap); + EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) + { + tree var = candidate (i); + struct access *access; + + access = sort_and_splice_var_accesses (var); + if (!access || !build_access_trees (access)) + disqualify_candidate (var, + "No or inhibitingly overlapping accesses."); + } + + propagate_all_subaccesses (); + + bool optimize_speed_p = !optimize_function_for_size_p (cfun); /* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>, fall back to a target default. */ unsigned HOST_WIDE_INT max_scalarization_size @@ -2884,7 +3163,6 @@ analyze_all_variable_accesses (void) if (global_options_set.x_param_sra_max_scalarization_size_size) max_scalarization_size = param_sra_max_scalarization_size_size; } - max_scalarization_size *= BITS_PER_UNIT; EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi) @@ -2892,46 +3170,56 @@ analyze_all_variable_accesses (void) && !bitmap_bit_p (cannot_scalarize_away_bitmap, i)) { tree var = candidate (i); + if (!VAR_P (var)) + continue; - if (VAR_P (var) && scalarizable_type_p (TREE_TYPE (var), - constant_decl_p (var))) + if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var))) > max_scalarization_size) { - if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var))) - <= max_scalarization_size) - { - create_total_scalarization_access (var); - completely_scalarize (var, TREE_TYPE (var), 0, var); - statistics_counter_event (cfun, - "Totally-scalarized aggregates", 1); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Will attempt to totally scalarize "); - print_generic_expr (dump_file, var); - fprintf (dump_file, " (UID: %u): \n", DECL_UID (var)); - } - } - else if (dump_file && (dump_flags & TDF_DETAILS)) + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Too big to totally scalarize: "); print_generic_expr (dump_file, var); fprintf (dump_file, " (UID: %u)\n", DECL_UID (var)); } + continue; } - } - bitmap_copy (tmp, candidate_bitmap); - EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) - { - tree var = candidate (i); - struct access *access; + bool all_types_ok = true; + for (struct access *access = get_first_repr_for_decl (var); + access; + access = access->next_grp) + if (!can_totally_scalarize_forest_p (access) + || !scalarizable_type_p (access->type, constant_decl_p (var))) + { + all_types_ok = false; + break; + } + if (!all_types_ok) + continue; - access = sort_and_splice_var_accesses (var); - if (!access || !build_access_trees (access)) - disqualify_candidate (var, - "No or inhibitingly overlapping accesses."); - } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Will attempt to totally scalarize "); + print_generic_expr (dump_file, var); + fprintf (dump_file, " (UID: %u): \n", DECL_UID (var)); + } + bool scalarized = true; + for (struct access *access = get_first_repr_for_decl (var); + access; + access = access->next_grp) + if (!is_gimple_reg_type (access->type) + && !totally_scalarize_subtree (access)) + { + scalarized = false; + break; + } - propagate_all_subaccesses (); + if (scalarized) + for (struct access *access = get_first_repr_for_decl (var); + access; + access = access->next_grp) + access->grp_total_scalarization = true; + } if (flag_checking) verify_all_sra_access_forests (); @@ -3804,25 +4092,39 @@ initialize_constant_pool_replacements (void) tree var = candidate (i); if (!constant_decl_p (var)) continue; - vec *access_vec = get_base_access_vector (var); - if (!access_vec) - continue; - for (unsigned i = 0; i < access_vec->length (); i++) + + struct access *access = get_first_repr_for_decl (var); + + while (access) { - struct access *access = (*access_vec)[i]; - if (!access->replacement_decl) - continue; - gassign *stmt - = gimple_build_assign (get_access_replacement (access), - unshare_expr (access->expr)); - if (dump_file && (dump_flags & TDF_DETAILS)) + if (access->replacement_decl) { - fprintf (dump_file, "Generating constant initializer: "); - print_gimple_stmt (dump_file, stmt, 0); - fprintf (dump_file, "\n"); + gassign *stmt + = gimple_build_assign (get_access_replacement (access), + unshare_expr (access->expr)); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Generating constant initializer: "); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "\n"); + } + gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + update_stmt (stmt); + } + + if (access->first_child) + access = access->first_child; + else if (access->next_sibling) + access = access->next_sibling; + else + { + while (access->parent && !access->next_sibling) + access = access->parent; + if (access->next_sibling) + access = access->next_sibling; + else + access = access->next_grp; } - gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); - update_stmt (stmt); } } -- cgit v1.1 From 6693911f069b1ada7c04aa1d00c3653ba694958a Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Wed, 29 Jan 2020 13:13:13 +0100 Subject: SRA: Also propagate accesses from LHS to RHS [PR92706] 2020-01-29 Martin Jambor PR tree-optimization/92706 * tree-sra.c (struct access): Fields first_link, last_link, next_queued and grp_queued renamed to first_rhs_link, last_rhs_link, next_rhs_queued and grp_rhs_queued respectively, new fields first_lhs_link, last_lhs_link, next_lhs_queued and grp_lhs_queued. (struct assign_link): Field next renamed to next_rhs, new field next_lhs. Updated comment. (work_queue_head): Renamed to rhs_work_queue_head. (lhs_work_queue_head): New variable. (add_link_to_lhs): New function. (relink_to_new_repr): Also relink LHS lists. (add_access_to_work_queue): Renamed to add_access_to_rhs_work_queue. (add_access_to_lhs_work_queue): New function. (pop_access_from_work_queue): Renamed to pop_access_from_rhs_work_queue. (pop_access_from_lhs_work_queue): New function. (build_accesses_from_assign): Also add links to LHS lists and to LHS work_queue. (child_would_conflict_in_lacc): Renamed to child_would_conflict_in_acc. Adjusted parameter names. (create_artificial_child_access): New parameter set_grp_read, use it. (subtree_mark_written_and_enqueue): Renamed to subtree_mark_written_and_rhs_enqueue. (propagate_subaccesses_across_link): Renamed to propagate_subaccesses_from_rhs. (propagate_subaccesses_from_lhs): New function. (propagate_all_subaccesses): Also propagate subaccesses from LHSs to RHSs. testsuite/ * gcc.dg/tree-ssa/pr92706-1.c: New test. --- gcc/tree-sra.c | 306 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 231 insertions(+), 75 deletions(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 2b08498..ea8594d 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -167,11 +167,15 @@ struct access struct access *next_sibling; /* Pointers to the first and last element in the linked list of assign - links. */ - struct assign_link *first_link, *last_link; + links for propagation from LHS to RHS. */ + struct assign_link *first_rhs_link, *last_rhs_link; - /* Pointer to the next access in the work queue. */ - struct access *next_queued; + /* Pointers to the first and last element in the linked list of assign + links for propagation from LHS to RHS. */ + struct assign_link *first_lhs_link, *last_lhs_link; + + /* Pointer to the next access in the work queues. */ + struct access *next_rhs_queued, *next_lhs_queued; /* Replacement variable for this access "region." Never to be accessed directly, always only by the means of get_access_replacement() and only @@ -184,8 +188,11 @@ struct access /* Is this particular access write access? */ unsigned write : 1; - /* Is this access currently in the work queue? */ - unsigned grp_queued : 1; + /* Is this access currently in the rhs work queue? */ + unsigned grp_rhs_queued : 1; + + /* Is this access currently in the lhs work queue? */ + unsigned grp_lhs_queued : 1; /* Does this group contain a write access? This flag is propagated down the access tree. */ @@ -262,12 +269,14 @@ typedef struct access *access_p; static object_allocator access_pool ("SRA accesses"); /* A structure linking lhs and rhs accesses from an aggregate assignment. They - are used to propagate subaccesses from rhs to lhs as long as they don't - conflict with what is already there. */ + are used to propagate subaccesses from rhs to lhs and vice versa as long as + they don't conflict with what is already there. In the RHS->LHS direction, + we also propagate grp_write flag to lazily mark that the access contains any + meaningful data. */ struct assign_link { struct access *lacc, *racc; - struct assign_link *next; + struct assign_link *next_rhs, *next_lhs; }; /* Alloc pool for allocating assign link structures. */ @@ -327,7 +336,7 @@ static struct obstack name_obstack; /* Head of a linked list of accesses that need to have its subaccesses propagated to their assignment counterparts. */ -static struct access *work_queue_head; +static struct access *rhs_work_queue_head, *lhs_work_queue_head; /* Dump contents of ACCESS to file F in a human friendly way. If GRP is true, representative fields are dumped, otherwise those which only describe the @@ -534,79 +543,155 @@ get_var_base_offset_size_access (tree base, HOST_WIDE_INT offset, } /* Add LINK to the linked list of assign links of RACC. */ + static void add_link_to_rhs (struct access *racc, struct assign_link *link) { gcc_assert (link->racc == racc); - if (!racc->first_link) + if (!racc->first_rhs_link) { - gcc_assert (!racc->last_link); - racc->first_link = link; + gcc_assert (!racc->last_rhs_link); + racc->first_rhs_link = link; } else - racc->last_link->next = link; + racc->last_rhs_link->next_rhs = link; - racc->last_link = link; - link->next = NULL; + racc->last_rhs_link = link; + link->next_rhs = NULL; } -/* Move all link structures in their linked list in OLD_RACC to the linked list - in NEW_RACC. */ +/* Add LINK to the linked list of lhs assign links of LACC. */ + static void -relink_to_new_repr (struct access *new_racc, struct access *old_racc) +add_link_to_lhs (struct access *lacc, struct assign_link *link) { - if (!old_racc->first_link) + gcc_assert (link->lacc == lacc); + + if (!lacc->first_lhs_link) { - gcc_assert (!old_racc->last_link); - return; + gcc_assert (!lacc->last_lhs_link); + lacc->first_lhs_link = link; } + else + lacc->last_lhs_link->next_lhs = link; + + lacc->last_lhs_link = link; + link->next_lhs = NULL; +} - if (new_racc->first_link) +/* Move all link structures in their linked list in OLD_ACC to the linked list + in NEW_ACC. */ +static void +relink_to_new_repr (struct access *new_acc, struct access *old_acc) +{ + if (old_acc->first_rhs_link) { - gcc_assert (!new_racc->last_link->next); - gcc_assert (!old_racc->last_link || !old_racc->last_link->next); - new_racc->last_link->next = old_racc->first_link; - new_racc->last_link = old_racc->last_link; + if (new_acc->first_rhs_link) + { + gcc_assert (!new_acc->last_rhs_link->next_rhs); + gcc_assert (!old_acc->last_rhs_link + || !old_acc->last_rhs_link->next_rhs); + + new_acc->last_rhs_link->next_rhs = old_acc->first_rhs_link; + new_acc->last_rhs_link = old_acc->last_rhs_link; + } + else + { + gcc_assert (!new_acc->last_rhs_link); + + new_acc->first_rhs_link = old_acc->first_rhs_link; + new_acc->last_rhs_link = old_acc->last_rhs_link; + } + old_acc->first_rhs_link = old_acc->last_rhs_link = NULL; } else + gcc_assert (!old_acc->last_rhs_link); + + if (old_acc->first_lhs_link) { - gcc_assert (!new_racc->last_link); - new_racc->first_link = old_racc->first_link; - new_racc->last_link = old_racc->last_link; + if (new_acc->first_lhs_link) + { + gcc_assert (!new_acc->last_lhs_link->next_lhs); + gcc_assert (!old_acc->last_lhs_link + || !old_acc->last_lhs_link->next_lhs); + + new_acc->last_lhs_link->next_lhs = old_acc->first_lhs_link; + new_acc->last_lhs_link = old_acc->last_lhs_link; + } + else + { + gcc_assert (!new_acc->last_lhs_link); + + new_acc->first_lhs_link = old_acc->first_lhs_link; + new_acc->last_lhs_link = old_acc->last_lhs_link; + } + old_acc->first_lhs_link = old_acc->last_lhs_link = NULL; } - old_racc->first_link = old_racc->last_link = NULL; + else + gcc_assert (!old_acc->last_lhs_link); + } -/* Add ACCESS to the work queue (which is actually a stack). */ +/* Add ACCESS to the work to queue for propagation of subaccesses from RHS to + LHS (which is actually a stack). */ static void -add_access_to_work_queue (struct access *access) +add_access_to_rhs_work_queue (struct access *access) { - if (access->first_link && !access->grp_queued) + if (access->first_rhs_link && !access->grp_rhs_queued) { - gcc_assert (!access->next_queued); - access->next_queued = work_queue_head; - access->grp_queued = 1; - work_queue_head = access; + gcc_assert (!access->next_rhs_queued); + access->next_rhs_queued = rhs_work_queue_head; + access->grp_rhs_queued = 1; + rhs_work_queue_head = access; } } -/* Pop an access from the work queue, and return it, assuming there is one. */ +/* Add ACCESS to the work to queue for propagation of subaccesses from LHS to + RHS (which is actually a stack). */ + +static void +add_access_to_lhs_work_queue (struct access *access) +{ + if (access->first_lhs_link && !access->grp_lhs_queued) + { + gcc_assert (!access->next_lhs_queued); + access->next_lhs_queued = lhs_work_queue_head; + access->grp_lhs_queued = 1; + lhs_work_queue_head = access; + } +} + +/* Pop an access from the work queue for propagating from RHS to LHS, and + return it, assuming there is one. */ static struct access * -pop_access_from_work_queue (void) +pop_access_from_rhs_work_queue (void) { - struct access *access = work_queue_head; + struct access *access = rhs_work_queue_head; - work_queue_head = access->next_queued; - access->next_queued = NULL; - access->grp_queued = 0; + rhs_work_queue_head = access->next_rhs_queued; + access->next_rhs_queued = NULL; + access->grp_rhs_queued = 0; return access; } +/* Pop an access from the work queue for propagating from LHS to RHS, and + return it, assuming there is one. */ + +static struct access * +pop_access_from_lhs_work_queue (void) +{ + struct access *access = lhs_work_queue_head; + + lhs_work_queue_head = access->next_lhs_queued; + access->next_lhs_queued = NULL; + access->grp_lhs_queued = 0; + return access; +} /* Allocate necessary structures. */ @@ -1203,7 +1288,9 @@ build_accesses_from_assign (gimple *stmt) link->lacc = lacc; link->racc = racc; add_link_to_rhs (racc, link); - add_access_to_work_queue (racc); + add_link_to_lhs (lacc, link); + add_access_to_rhs_work_queue (racc); + add_access_to_lhs_work_queue (lacc); /* Let's delay marking the areas as written until propagation of accesses across link, unless the nature of rhs tells us that its data comes @@ -2492,17 +2579,17 @@ analyze_access_trees (struct access *access) return ret; } -/* Return true iff a potential new child of LACC at offset OFFSET and with size +/* Return true iff a potential new child of ACC at offset OFFSET and with size SIZE would conflict with an already existing one. If exactly such a child - already exists in LACC, store a pointer to it in EXACT_MATCH. */ + already exists in ACC, store a pointer to it in EXACT_MATCH. */ static bool -child_would_conflict_in_lacc (struct access *lacc, HOST_WIDE_INT norm_offset, +child_would_conflict_in_acc (struct access *acc, HOST_WIDE_INT norm_offset, HOST_WIDE_INT size, struct access **exact_match) { struct access *child; - for (child = lacc->first_child; child; child = child->next_sibling) + for (child = acc->first_child; child; child = child->next_sibling) { if (child->offset == norm_offset && child->size == size) { @@ -2528,7 +2615,7 @@ child_would_conflict_in_lacc (struct access *lacc, HOST_WIDE_INT norm_offset, static struct access * create_artificial_child_access (struct access *parent, struct access *model, HOST_WIDE_INT new_offset, - bool set_grp_write) + bool set_grp_read, bool set_grp_write) { struct access **child; tree expr = parent->base; @@ -2551,8 +2638,8 @@ create_artificial_child_access (struct access *parent, struct access *model, access->size = model->size; access->type = model->type; access->parent = parent; + access->grp_read = set_grp_read; access->grp_write = set_grp_write; - access->grp_read = false; access->reverse = model->reverse; child = &parent->first_child; @@ -2571,16 +2658,16 @@ create_artificial_child_access (struct access *parent, struct access *model, and has assignment links leading from it, re-enqueue it. */ static void -subtree_mark_written_and_enqueue (struct access *access) +subtree_mark_written_and_rhs_enqueue (struct access *access) { if (access->grp_write) return; access->grp_write = true; - add_access_to_work_queue (access); + add_access_to_rhs_work_queue (access); struct access *child; for (child = access->first_child; child; child = child->next_sibling) - subtree_mark_written_and_enqueue (child); + subtree_mark_written_and_rhs_enqueue (child); } /* Propagate subaccesses and grp_write flags of RACC across an assignment link @@ -2590,7 +2677,7 @@ subtree_mark_written_and_enqueue (struct access *access) possible. */ static bool -propagate_subaccesses_across_link (struct access *lacc, struct access *racc) +propagate_subaccesses_from_rhs (struct access *lacc, struct access *racc) { struct access *rchild; HOST_WIDE_INT norm_delta = lacc->offset - racc->offset; @@ -2603,7 +2690,7 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) gcc_checking_assert (!comes_initialized_p (racc->base)); if (racc->grp_write) { - subtree_mark_written_and_enqueue (lacc); + subtree_mark_written_and_rhs_enqueue (lacc); ret = true; } } @@ -2615,7 +2702,7 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) if (!lacc->grp_write) { ret = true; - subtree_mark_written_and_enqueue (lacc); + subtree_mark_written_and_rhs_enqueue (lacc); } return ret; } @@ -2625,7 +2712,7 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) if (!lacc->grp_write) { ret = true; - subtree_mark_written_and_enqueue (lacc); + subtree_mark_written_and_rhs_enqueue (lacc); } if (!lacc->first_child && !racc->first_child) { @@ -2655,7 +2742,7 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) struct access *new_acc = NULL; HOST_WIDE_INT norm_offset = rchild->offset + norm_delta; - if (child_would_conflict_in_lacc (lacc, norm_offset, rchild->size, + if (child_would_conflict_in_acc (lacc, norm_offset, rchild->size, &new_acc)) { if (new_acc) @@ -2663,17 +2750,17 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) if (!new_acc->grp_write && rchild->grp_write) { gcc_assert (!lacc->grp_write); - subtree_mark_written_and_enqueue (new_acc); + subtree_mark_written_and_rhs_enqueue (new_acc); ret = true; } rchild->grp_hint = 1; new_acc->grp_hint |= new_acc->grp_read; if (rchild->first_child - && propagate_subaccesses_across_link (new_acc, rchild)) + && propagate_subaccesses_from_rhs (new_acc, rchild)) { ret = 1; - add_access_to_work_queue (new_acc); + add_access_to_rhs_work_queue (new_acc); } } else @@ -2681,7 +2768,7 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) if (!lacc->grp_write) { ret = true; - subtree_mark_written_and_enqueue (lacc); + subtree_mark_written_and_rhs_enqueue (lacc); } } continue; @@ -2692,41 +2779,85 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) if (rchild->grp_write && !lacc->grp_write) { ret = true; - subtree_mark_written_and_enqueue (lacc); + subtree_mark_written_and_rhs_enqueue (lacc); } continue; } rchild->grp_hint = 1; new_acc = create_artificial_child_access (lacc, rchild, norm_offset, - lacc->grp_write - || rchild->grp_write); + false, (lacc->grp_write + || rchild->grp_write)); gcc_checking_assert (new_acc); if (racc->first_child) - propagate_subaccesses_across_link (new_acc, rchild); + propagate_subaccesses_from_rhs (new_acc, rchild); - add_access_to_work_queue (lacc); + add_access_to_rhs_work_queue (lacc); ret = true; } return ret; } +/* Propagate subaccesses of LACC across an assignment link to RACC if they + should inhibit total scalarization of the corresponding area. No flags are + being propagated in the process. Return true if anything changed. */ + +static bool +propagate_subaccesses_from_lhs (struct access *lacc, struct access *racc) +{ + if (is_gimple_reg_type (racc->type) + || lacc->grp_unscalarizable_region + || racc->grp_unscalarizable_region) + return false; + + /* TODO: Do we want set some new racc flag to stop potential total + scalarization if lacc is a scalar access (and none fo the two have + children)? */ + + bool ret = false; + HOST_WIDE_INT norm_delta = racc->offset - lacc->offset; + for (struct access *lchild = lacc->first_child; + lchild; + lchild = lchild->next_sibling) + { + struct access *matching_acc = NULL; + HOST_WIDE_INT norm_offset = lchild->offset + norm_delta; + + if (lchild->grp_unscalarizable_region + || child_would_conflict_in_acc (racc, norm_offset, lchild->size, + &matching_acc)) + { + if (matching_acc + && propagate_subaccesses_from_lhs (lchild, matching_acc)) + add_access_to_lhs_work_queue (matching_acc); + continue; + } + + struct access *new_acc + = create_artificial_child_access (racc, lchild, norm_offset, + true, false); + propagate_subaccesses_from_lhs (lchild, new_acc); + ret = true; + } + return ret; +} + /* Propagate all subaccesses across assignment links. */ static void propagate_all_subaccesses (void) { - while (work_queue_head) + while (rhs_work_queue_head) { - struct access *racc = pop_access_from_work_queue (); + struct access *racc = pop_access_from_rhs_work_queue (); struct assign_link *link; if (racc->group_representative) racc= racc->group_representative; - gcc_assert (racc->first_link); + gcc_assert (racc->first_rhs_link); - for (link = racc->first_link; link; link = link->next) + for (link = racc->first_rhs_link; link; link = link->next_rhs) { struct access *lacc = link->lacc; @@ -2739,22 +2870,47 @@ propagate_all_subaccesses (void) { if (!lacc->grp_write) { - subtree_mark_written_and_enqueue (lacc); + subtree_mark_written_and_rhs_enqueue (lacc); reque_parents = true; } } - else if (propagate_subaccesses_across_link (lacc, racc)) + else if (propagate_subaccesses_from_rhs (lacc, racc)) reque_parents = true; if (reque_parents) do { - add_access_to_work_queue (lacc); + add_access_to_rhs_work_queue (lacc); lacc = lacc->parent; } while (lacc); } } + + while (lhs_work_queue_head) + { + struct access *lacc = pop_access_from_lhs_work_queue (); + struct assign_link *link; + + if (lacc->group_representative) + lacc = lacc->group_representative; + gcc_assert (lacc->first_lhs_link); + + if (!bitmap_bit_p (candidate_bitmap, DECL_UID (lacc->base))) + continue; + + for (link = lacc->first_lhs_link; link; link = link->next_lhs) + { + struct access *racc = link->racc; + + if (racc->group_representative) + racc = racc->group_representative; + if (!bitmap_bit_p (candidate_bitmap, DECL_UID (racc->base))) + continue; + if (propagate_subaccesses_from_lhs (lacc, racc)) + add_access_to_lhs_work_queue (racc); + } + } } /* Return true if the forest beginning with ROOT does not contain -- cgit v1.1 From 9714f1a70d184fb6d282ac543c57734ed1fb39ac Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 11 Feb 2020 10:52:31 +0100 Subject: tree-optimization/93661 properly guard tree_to_poly_int64 2020-02-11 Richard Biener PR tree-optimization/93661 PR tree-optimization/93662 * tree-ssa-sccvn.c (vn_reference_lookup_3): Properly guard tree_to_poly_int64. * tree-sra.c (get_access_for_expr): Likewise. * gcc.dg/pr93661.c: New testcase. --- gcc/tree-sra.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index ea8594d..f03ad3a 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -3605,8 +3605,10 @@ get_access_for_expr (tree expr) if (tree basesize = DECL_SIZE (base)) { - poly_int64 sz = tree_to_poly_int64 (basesize); - if (offset < 0 || known_le (sz, offset)) + poly_int64 sz; + if (offset < 0 + || !poly_int_tree_p (basesize, &sz) + || known_le (sz, offset)) return NULL; } -- cgit v1.1 From 515dd04260c6049110d7624eaf1b276929dcd9af Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Fri, 14 Feb 2020 15:02:35 +0100 Subject: sra: Avoid verification failure (PR 93516) get_ref_base_and_extent can return different sizes for COMPONENT_REFs and DECLs of the same type, with the latter including (more?) padding. When in the IL there is an assignment between such a COMPONENT_REF and a DECL, SRA will try to propagate the access from the former as a child of the latter, creating an artificial reference that does not match the access's declared size, which triggers a verifier assert. Fixed by teaching the propagation functions about this special situation so that they don't do it. The condition is the same that build_user_friendly_ref_for_offset uses so the artificial reference causing the verifier is guaranteed not to be created. 2020-02-14 Martin Jambor PR tree-optimization/93516 * tree-sra.c (propagate_subaccesses_from_rhs): Do not create access of the same type as the parent. (propagate_subaccesses_from_lhs): Likewise. gcc/testsuite/ * g++.dg/tree-ssa/pr93516.C: New test. --- gcc/tree-sra.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index f03ad3a..0cfac0a 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2785,9 +2785,17 @@ propagate_subaccesses_from_rhs (struct access *lacc, struct access *racc) } rchild->grp_hint = 1; - new_acc = create_artificial_child_access (lacc, rchild, norm_offset, - false, (lacc->grp_write - || rchild->grp_write)); + /* Because get_ref_base_and_extent always includes padding in size for + accesses to DECLs but not necessarily for COMPONENT_REFs of the same + type, we might be actually attempting to here to create a child of the + same type as the parent. */ + if (!types_compatible_p (lacc->type, rchild->type)) + new_acc = create_artificial_child_access (lacc, rchild, norm_offset, + false, + (lacc->grp_write + || rchild->grp_write)); + else + new_acc = lacc; gcc_checking_assert (new_acc); if (racc->first_child) propagate_subaccesses_from_rhs (new_acc, rchild); @@ -2834,10 +2842,19 @@ propagate_subaccesses_from_lhs (struct access *lacc, struct access *racc) continue; } - struct access *new_acc - = create_artificial_child_access (racc, lchild, norm_offset, - true, false); - propagate_subaccesses_from_lhs (lchild, new_acc); + /* Because get_ref_base_and_extent always includes padding in size for + accesses to DECLs but not necessarily for COMPONENT_REFs of the same + type, we might be actually attempting to here to create a child of the + same type as the parent. */ + if (!types_compatible_p (racc->type, lchild->type)) + { + struct access *new_acc + = create_artificial_child_access (racc, lchild, norm_offset, + true, false); + propagate_subaccesses_from_lhs (lchild, new_acc); + } + else + propagate_subaccesses_from_lhs (lchild, racc); ret = true; } return ret; -- cgit v1.1 From 665c5bad168ab63629b29ed2ce08ed042c088dc2 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Wed, 19 Feb 2020 11:08:40 +0100 Subject: sra: Avoid totally scalarizing overallping field_decls (PR 93667) [[no_unique_address]] C++ attribute can cause two fields of a RECORD_TYPE overlap, which currently confuses the totally scalarizing code into creating invalid access tree. For GCC 10, I'd like to simply disable total scalarization of types where this happens. For GCC 11 I'll write down a TODO item to enable total scalarization of cases like this where the problematic fields are basically empty - despite having a non-zero size - i.e. when they are just RECORD_TYPEs without any data fields. 2020-02-19 Martin Jambor gcc/ PR tree-optimization/93667 * tree-sra.c (scalarizable_type_p): Return false if record fields do not follow wach other. gcc/testsuite/ PR tree-optimization/93667 * g++.dg/tree-ssa/pr93667.C: New test. --- gcc/tree-sra.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 0cfac0a..4c7d651 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -958,6 +958,9 @@ scalarizable_type_p (tree type, bool const_decl) if (type_contains_placeholder_p (type)) return false; + bool have_predecessor_field = false; + HOST_WIDE_INT prev_pos = 0; + switch (TREE_CODE (type)) { case RECORD_TYPE: @@ -966,6 +969,17 @@ scalarizable_type_p (tree type, bool const_decl) { tree ft = TREE_TYPE (fld); + if (zerop (DECL_SIZE (fld))) + continue; + + HOST_WIDE_INT pos = int_bit_position (fld); + if (have_predecessor_field + && pos <= prev_pos) + return false; + + have_predecessor_field = true; + prev_pos = pos; + if (DECL_BIT_FIELD (fld)) return false; -- cgit v1.1 From 51faf07cef9293af544bfacc7d0b320ab90d7d60 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Wed, 19 Feb 2020 11:13:52 +0100 Subject: sra: Do not create zero sized accesses (PR 93776) SRA can get a bit confused with zero-sized accesses like the one in the testcase. Since there is nothing in the access, nothing is scalarized, but we can get order of the structures wrong, which the verifier is not happy about. Fixed by simply ignoring such accesses. 2020-02-19 Martin Jambor PR tree-optimization/93776 * tree-sra.c (create_access): Do not create zero size accesses. (get_access_for_expr): Do not search for zero sized accesses. testsuite/ * gcc.dg/tree-ssa/pr93776.c: New test. --- gcc/tree-sra.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 4c7d651..49f9001 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -926,6 +926,8 @@ create_access (tree expr, gimple *stmt, bool write) size = max_size; unscalarizable_region = true; } + if (size == 0) + return NULL; if (size < 0) { disqualify_candidate (base, "Encountered an unconstrained access."); @@ -3643,7 +3645,8 @@ get_access_for_expr (tree expr) return NULL; } - if (!bitmap_bit_p (candidate_bitmap, DECL_UID (base))) + if (max_size == 0 + || !bitmap_bit_p (candidate_bitmap, DECL_UID (base))) return NULL; return get_var_base_offset_size_access (base, offset, max_size); -- cgit v1.1 From 4d6bf96b583d77336cf6ca643d92d068a88414fa Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Fri, 21 Feb 2020 13:38:22 +0100 Subject: sra: Only verify sizes of scalar accesses (PR 93845) the testcase is another example - in addition to recent PR 93516 - where the SRA access verifier is confused by the fact that get_ref_base_extent can return different sizes for the same type, depending whether they are COMPONENT_REF or not. In the previous bug I decided to keep the verifier check for aggregate type even though it is not really important and instead avoid easily detectable type-within-the-same-type situation. This testcase is however a result of a fairly random looking type cast and so cannot be handled in the same way. Because the check is not really important for aggregates, this patch simply disables it for non-register types. 2020-02-21 Martin Jambor PR tree-optimization/93845 * tree-sra.c (verify_sra_access_forest): Only test access size of scalar types. testsuite/ * g++.dg/tree-ssa/pr93845.C: New test. --- gcc/tree-sra.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 49f9001..5561ea6 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2355,7 +2355,8 @@ verify_sra_access_forest (struct access *root) gcc_assert (offset == access->offset); gcc_assert (access->grp_unscalarizable_region || size == max_size); - gcc_assert (max_size == access->size); + gcc_assert (!is_gimple_reg_type (access->type) + || max_size == access->size); gcc_assert (reverse == access->reverse); if (access->first_child) -- cgit v1.1 From 700d4cb08c88aec37c13e21e63dd61fd698baabc Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 17 Mar 2020 13:52:19 +0100 Subject: Fix up duplicated duplicated words mostly in comments In the r10-7197-gbae7b38cf8a21e068ad5c0bab089dedb78af3346 commit I've noticed duplicated word in a message, which lead me to grep for those and we have a tons of them. I've used grep -v 'long long\|optab optab\|template template\|double double' *.[chS] */*.[chS] *.def config/*/* 2>/dev/null | grep ' \([a-zA-Z]\+\) \1 ' Note, the command will not detect the doubled words at the start or end of line or when one of the words is at the end of line and the next one at the start of another one. Some of it is fairly obvious, e.g. all the "the the" cases which is something I've posted and committed patch for already e.g. in 2016, other cases are often valid, e.g. "that that" seems to look mostly ok to me. Some cases are quite hard to figure out, I've left out some of them from the patch (e.g. "and and" in some cases isn't talking about bitwise/logical and and so looks incorrect, but in other cases it is talking about those operations). In most cases the right solution seems to be to remove one of the duplicated words, but not always. I think most important are the ones with user visible messages (in the patch 3 of the first 4 hunks), the rest is just comments (and internal documentation; for that see the doc/tm.texi changes). 2020-03-17 Jakub Jelinek * lra-spills.c (remove_pseudos): Fix up duplicated word issue in a dump message. * tree-sra.c (create_access_replacement): Fix up duplicated word issue in a comment. * read-rtl-function.c (find_param_by_name, function_reader::parse_enum_value, function_reader::get_insn_by_uid): Likewise. * spellcheck.c (get_edit_distance_cutoff): Likewise. * tree-data-ref.c (create_ifn_alias_checks): Likewise. * tree.def (SWITCH_EXPR): Likewise. * selftest.c (assert_str_contains): Likewise. * ipa-param-manipulation.h (class ipa_param_body_adjustments): Likewise. * tree-ssa-math-opts.c (convert_expand_mult_copysign): Likewise. * tree-ssa-loop-split.c (find_vdef_in_loop): Likewise. * langhooks.h (struct lang_hooks_for_decls): Likewise. * ipa-prop.h (struct ipa_param_descriptor): Likewise. * tree-ssa-strlen.c (handle_builtin_string_cmp, handle_store): Likewise. * tree-ssa-dom.c (simplify_stmt_for_jump_threading): Likewise. * tree-ssa-reassoc.c (reassociate_bb): Likewise. * tree.c (component_ref_size): Likewise. * hsa-common.c (hsa_init_compilation_unit_data): Likewise. * gimple-ssa-sprintf.c (get_string_length, format_string, format_directive): Likewise. * omp-grid.c (grid_process_kernel_body_copy): Likewise. * input.c (string_concat_db::get_string_concatenation, test_lexer_string_locations_ucn4): Likewise. * cfgexpand.c (pass_expand::execute): Likewise. * gimple-ssa-warn-restrict.c (builtin_memref::offset_out_of_bounds, maybe_diag_overlap): Likewise. * rtl.c (RTX_CODE_HWINT_P_1): Likewise. * shrink-wrap.c (spread_components): Likewise. * tree-ssa-dse.c (initialize_ao_ref_for_dse, valid_ao_ref_for_dse): Likewise. * tree-call-cdce.c (shrink_wrap_one_built_in_call_with_conds): Likewise. * dwarf2out.c (dwarf2out_early_finish): Likewise. * gimple-ssa-store-merging.c: Likewise. * ira-costs.c (record_operand_costs): Likewise. * tree-vect-loop.c (vectorizable_reduction): Likewise. * target.def (dispatch): Likewise. (validate_dims, gen_ccmp_first): Fix up duplicated word issue in documentation text. * doc/tm.texi: Regenerated. * config/i386/x86-tune.def (X86_TUNE_PARTIAL_FLAG_REG_STALL): Fix up duplicated word issue in a comment. * config/i386/i386.c (ix86_test_loading_unspec): Likewise. * config/i386/i386-features.c (remove_partial_avx_dependency): Likewise. * config/msp430/msp430.c (msp430_select_section): Likewise. * config/gcn/gcn-run.c (load_image): Likewise. * config/aarch64/aarch64-sve.md (sve_ld1r): Likewise. * config/aarch64/aarch64.c (aarch64_gen_adjusted_ldpstp): Likewise. * config/aarch64/falkor-tag-collision-avoidance.c (single_dest_per_chain): Likewise. * config/nvptx/nvptx.c (nvptx_record_fndecl): Likewise. * config/fr30/fr30.c (fr30_arg_partial_bytes): Likewise. * config/rs6000/rs6000-string.c (expand_cmp_vec_sequence): Likewise. * config/rs6000/rs6000-p8swap.c (replace_swapped_load_constant): Likewise. * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Likewise. * config/rs6000/rs6000.c (rs6000_option_override_internal): Likewise. * config/rs6000/rs6000-logue.c (rs6000_emit_probe_stack_range_stack_clash): Likewise. * config/nds32/nds32-md-auxiliary.c (nds32_split_ashiftdi3): Likewise. Fix various other issues in the comment. c-family/ * c-common.c (resolve_overloaded_builtin): Fix up duplicated word issue in a diagnostic message. cp/ * pt.c (tsubst): Fix up duplicated word issue in a diagnostic message. (lookup_template_class_1, tsubst_expr): Fix up duplicated word issue in a comment. * parser.c (cp_parser_statement, cp_parser_linkage_specification, cp_parser_placeholder_type_specifier, cp_parser_constraint_requires_parens): Likewise. * name-lookup.c (suggest_alternative_in_explicit_scope): Likewise. fortran/ * array.c (gfc_check_iter_variable): Fix up duplicated word issue in a comment. * arith.c (gfc_arith_concat): Likewise. * resolve.c (gfc_resolve_ref): Likewise. * frontend-passes.c (matmul_lhs_realloc): Likewise. * module.c (gfc_match_submodule, load_needed): Likewise. * trans-expr.c (gfc_init_se): Likewise. --- gcc/tree-sra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 5561ea6..afff0ec 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2142,7 +2142,7 @@ sort_and_splice_var_accesses (tree var) /* Create a variable for the given ACCESS which determines the type, name and a few other properties. Return the variable declaration and store it also to ACCESS->replacement. REG_TREE is used when creating a declaration to base a - default-definition SSA name on on in order to facilitate an uninitialized + default-definition SSA name on in order to facilitate an uninitialized warning. It is used instead of the actual ACCESS type if that is not of a gimple register type. */ -- cgit v1.1 From 29f23ed79b60949fc60f6fdbbd931bd58090b241 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Sat, 21 Mar 2020 00:21:02 +0100 Subject: sra: Cap number of sub-access propagations with a param (PR 93435) PR 93435 is a perfect SRA bomb. It initializes an array of 16 chars element-wise, then uses that to initialize an aggregate that consists of four such arrays, that one to initialize one four times as big as the previous one all the way to an aggregate that has 64kb. This causes the sub-access propagation across assignments to create thousands of byte-sized artificial accesses which are then eligible to be replaced - they do facilitate forward propagation but there is enough of them for DSE to never finish. This patch avoids that situation by accounting how many of such replacements can be created per SRA candidate. The default value of 32 was just the largest power of two that did not slow down compilation of the testcase, but it should also hopefully be big enough for any reasonable input that might rely on the optimization. 2020-03-20 Martin Jambor PR tree-optimization/93435 * params.opt (sra-max-propagations): New parameter. * tree-sra.c (propagation_budget): New variable. (budget_for_propagation_access): New function. (propagate_subaccesses_from_rhs): Use it. (propagate_subaccesses_from_lhs): Likewise. (propagate_all_subaccesses): Set up and destroy propagation_budget. gcc/testsuite/ * gcc.dg/tree-ssa/pr93435.c: New test. --- gcc/tree-sra.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index afff0ec..b2056b5 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -285,6 +285,9 @@ static object_allocator assign_link_pool ("SRA links"); /* Base (tree) -> Vector (vec *) map. */ static hash_map > *base_access_vec; +/* Hash to limit creation of artificial accesses */ +static hash_map *propagation_budget; + /* Candidate hash table helpers. */ struct uid_decl_hasher : nofree_ptr_hash @@ -2687,6 +2690,32 @@ subtree_mark_written_and_rhs_enqueue (struct access *access) subtree_mark_written_and_rhs_enqueue (child); } +/* If there is still budget to create a propagation access for DECL, return + true and decrement the budget. Otherwise return false. */ + +static bool +budget_for_propagation_access (tree decl) +{ + unsigned b, *p = propagation_budget->get (decl); + if (p) + b = *p; + else + b = param_sra_max_propagations; + + if (b == 0) + return false; + b--; + + if (b == 0 && dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "The propagation budget of "); + print_generic_expr (dump_file, decl); + fprintf (dump_file, " (UID: %u) has been exhausted.\n", DECL_UID (decl)); + } + propagation_budget->put (decl, b); + return true; +} + /* Propagate subaccesses and grp_write flags of RACC across an assignment link to LACC. Enqueue sub-accesses as necessary so that the write flag is propagated transitively. Return true if anything changed. Additionally, if @@ -2791,7 +2820,8 @@ propagate_subaccesses_from_rhs (struct access *lacc, struct access *racc) continue; } - if (rchild->grp_unscalarizable_region) + if (rchild->grp_unscalarizable_region + || !budget_for_propagation_access (lacc->base)) { if (rchild->grp_write && !lacc->grp_write) { @@ -2851,7 +2881,8 @@ propagate_subaccesses_from_lhs (struct access *lacc, struct access *racc) if (lchild->grp_unscalarizable_region || child_would_conflict_in_acc (racc, norm_offset, lchild->size, - &matching_acc)) + &matching_acc) + || !budget_for_propagation_access (racc->base)) { if (matching_acc && propagate_subaccesses_from_lhs (lchild, matching_acc)) @@ -2882,6 +2913,7 @@ propagate_subaccesses_from_lhs (struct access *lacc, struct access *racc) static void propagate_all_subaccesses (void) { + propagation_budget = new hash_map; while (rhs_work_queue_head) { struct access *racc = pop_access_from_rhs_work_queue (); @@ -2945,6 +2977,7 @@ propagate_all_subaccesses (void) add_access_to_lhs_work_queue (racc); } } + delete propagation_budget; } /* Return true if the forest beginning with ROOT does not contain -- cgit v1.1 From 2111d5406a4ec56d6335bde779a995914d0a36d1 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Thu, 9 Apr 2020 14:37:21 +0200 Subject: sra: Fix sra_modify_expr handling of partial writes (PR 94482) when sra_modify_expr is invoked on an expression that modifies only part of the underlying replacement, such as a BIT_FIELD_REF on a LHS of an assignment and the SRA replacement's type is not compatible with what is being replaced (0th operand of the B_F_R in the above example), it does not work properly, basically throwing away the partd of the expr that should have stayed intact. This is fixed in two ways. For BIT_FIELD_REFs, which operate on the binary image of the replacement (and so in a way serve as a VIEW_CONVERT_EXPR) we just do not bother with convertsing. For REALPART_EXPRs and IMAGPART_EXPRs, if the replacement is not a register, we insert a VIEW_CONVERT_EXPR under the complex partial access expression, which is always OK, for loads from registers we take the extra step of converting it to a temporary. This revealed a bug in fwprop which is fixed with the hunk from Richi. The testcase for handling REALPART_EXPR and IMAGPART_EXPR is a bit fragile because SRA prefers complex and vector types over anything else (and in between the two it decides based on TYPE_UID which in my testing today always preferred complex types) and so I only run it at -O1 (which is the only level where the the test fails for me). Bootstrapped and tested on x86_64-linux, i686-linux and aarch64-linux. 2020-04-09 Martin Jambor Richard Biener PR tree-optimization/94482 * tree-sra.c (create_access_replacement): Dump new replacement with TDF_UID. (sra_modify_expr): Fix handling of cases when the original EXPR writes to only part of the replacement. * tree-ssa-forwprop.c (pass_forwprop::execute): Properly verify the first operand of combinations into REAL/IMAGPART_EXPR and BIT_FIELD_REF. testsuite/ * gcc.dg/torture/pr94482.c: New test. * gcc.dg/tree-ssa/pr94482-2.c: Likewise. --- gcc/tree-sra.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index b2056b5..84c113c 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2257,7 +2257,7 @@ create_access_replacement (struct access *access, tree reg_type = NULL_TREE) print_generic_expr (dump_file, access->base); fprintf (dump_file, " offset: %u, size: %u: ", (unsigned) access->offset, (unsigned) access->size); - print_generic_expr (dump_file, repl); + print_generic_expr (dump_file, repl, TDF_UID); fprintf (dump_file, "\n"); } } @@ -3698,6 +3698,7 @@ sra_modify_expr (tree *expr, gimple_stmt_iterator *gsi, bool write) location_t loc; struct access *access; tree type, bfr, orig_expr; + bool partial_cplx_access = false; if (TREE_CODE (*expr) == BIT_FIELD_REF) { @@ -3708,7 +3709,10 @@ sra_modify_expr (tree *expr, gimple_stmt_iterator *gsi, bool write) bfr = NULL_TREE; if (TREE_CODE (*expr) == REALPART_EXPR || TREE_CODE (*expr) == IMAGPART_EXPR) - expr = &TREE_OPERAND (*expr, 0); + { + expr = &TREE_OPERAND (*expr, 0); + partial_cplx_access = true; + } access = get_access_for_expr (*expr); if (!access) return false; @@ -3736,13 +3740,32 @@ sra_modify_expr (tree *expr, gimple_stmt_iterator *gsi, bool write) be accessed as a different type too, potentially creating a need for type conversion (see PR42196) and when scalarized unions are involved in assembler statements (see PR42398). */ - if (!useless_type_conversion_p (type, access->type)) + if (!bfr && !useless_type_conversion_p (type, access->type)) { tree ref; ref = build_ref_for_model (loc, orig_expr, 0, access, gsi, false); - if (write) + if (partial_cplx_access) + { + /* VIEW_CONVERT_EXPRs in partial complex access are always fine in + the case of a write because in such case the replacement cannot + be a gimple register. In the case of a load, we have to + differentiate in between a register an non-register + replacement. */ + tree t = build1 (VIEW_CONVERT_EXPR, type, repl); + gcc_checking_assert (!write || access->grp_partial_lhs); + if (!access->grp_partial_lhs) + { + tree tmp = make_ssa_name (type); + gassign *stmt = gimple_build_assign (tmp, t); + /* This is always a read. */ + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + t = tmp; + } + *expr = t; + } + else if (write) { gassign *stmt; -- cgit v1.1 From bd87b1fddbbe7d424671ebf81c96e12d748fafc7 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Thu, 16 Apr 2020 11:04:41 +0200 Subject: sra: Fix access verification (PR 94598) get_ref_base_and_extent recognizes ARRAY_REFs with variable index but into arrays of length one as constant offset accesses. However, max_size in such cases is extended to span the whole element. This confuses SRA verification when SRA also builds its (total scalarization) access structures to describe fields under such array - get_ref_base_and_extent returns different size and max_size for them. Fixed by not performing the check for total scalarization accesses. The subsequent check then had to be changed to use size and not max_size too, which meant it has to be skipped when the access structure describes a genuine variable array access. Bootstrapped and tested on x86_64-linux. 2020-04-16 Martin Jambor PR tree-optimization/94598 * tree-sra.c (verify_sra_access_forest): Fix verification of total scalarization accesses under access to one-element arrays. testsuite/ * gcc.dg/tree-ssa/pr94598.c: New test. --- gcc/tree-sra.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 84c113c..227bde0 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2357,9 +2357,11 @@ verify_sra_access_forest (struct access *root) gcc_assert (base == first_base); gcc_assert (offset == access->offset); gcc_assert (access->grp_unscalarizable_region + || access->grp_total_scalarization || size == max_size); - gcc_assert (!is_gimple_reg_type (access->type) - || max_size == access->size); + gcc_assert (access->grp_unscalarizable_region + || !is_gimple_reg_type (access->type) + || size == access->size); gcc_assert (reverse == access->reverse); if (access->first_child) -- cgit v1.1 From eb72dc663e9070b281be83a80f6f838a3a878822 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 22 Apr 2020 10:40:51 +0200 Subject: extend DECL_GIMPLE_REG_P to all types This extends DECL_GIMPLE_REG_P to all types so we can clear TREE_ADDRESSABLE even for integers with partial defs, not just complex and vector variables. To make that transition easier the patch inverts DECL_GIMPLE_REG_P to DECL_NOT_GIMPLE_REG_P since that makes the default the current state for all other types besides complex and vectors. For the testcase in PR94703 we're able to expand the partial def'ed local integer to a register then, producing a single movl rather than going through the stack. On i?86 this execute FAILs gcc.dg/torture/pr71522.c because we now expand a round-trip through a long double automatic var to a register fld/fst which normalizes the value. For that during RTL expansion we're looking for problematic punnings of decls and avoid pseudos for those - I chose integer or BLKmode accesses on decls with modes where precision doesn't match bitsize which covers the XFmode case. 2020-05-07 Richard Biener PR middle-end/94703 * tree-core.h (tree_decl_common::gimple_reg_flag): Rename ... (tree_decl_common::not_gimple_reg_flag): ... to this. * tree.h (DECL_GIMPLE_REG_P): Rename ... (DECL_NOT_GIMPLE_REG_P): ... to this. * gimple-expr.c (copy_var_decl): Copy DECL_NOT_GIMPLE_REG_P. (create_tmp_reg): Simplify. (create_tmp_reg_fn): Likewise. (is_gimple_reg): Check DECL_NOT_GIMPLE_REG_P for all regs. * gimplify.c (create_tmp_from_val): Simplify. (gimplify_bind_expr): Likewise. (gimplify_compound_literal_expr): Likewise. (gimplify_function_tree): Likewise. (prepare_gimple_addressable): Set DECL_NOT_GIMPLE_REG_P. * asan.c (create_odr_indicator): Do not clear DECL_GIMPLE_REG_P. (asan_add_global): Copy it. * cgraphunit.c (cgraph_node::expand_thunk): Force args to be GIMPLE regs. * function.c (gimplify_parameters): Copy DECL_NOT_GIMPLE_REG_P. * ipa-param-manipulation.c (ipa_param_body_adjustments::common_initialization): Simplify. (ipa_param_body_adjustments::reset_debug_stmts): Copy DECL_NOT_GIMPLE_REG_P. * omp-low.c (lower_omp_for_scan): Do not set DECL_GIMPLE_REG_P. * sanopt.c (sanitize_rewrite_addressable_params): Likewise. * tree-cfg.c (make_blocks_1): Simplify. (verify_address): Do not verify DECL_GIMPLE_REG_P setting. * tree-eh.c (lower_eh_constructs_2): Simplify. * tree-inline.c (declare_return_variable): Adjust and generalize. (copy_decl_to_var): Copy DECL_NOT_GIMPLE_REG_P. (copy_result_decl_to_var): Likewise. * tree-into-ssa.c (pass_build_ssa::execute): Adjust comment. * tree-nested.c (create_tmp_var_for): Simplify. * tree-parloops.c (separate_decls_in_region_name): Copy DECL_NOT_GIMPLE_REG_P. * tree-sra.c (create_access_replacement): Adjust and generalize partial def support. * tree-ssa-forwprop.c (pass_forwprop::execute): Set DECL_NOT_GIMPLE_REG_P on decls we introduce partial defs on. * tree-ssa.c (maybe_optimize_var): Handle clearing of TREE_ADDRESSABLE and setting/clearing DECL_NOT_GIMPLE_REG_P independently. * lto-streamer-out.c (hash_tree): Hash DECL_NOT_GIMPLE_REG_P. * tree-streamer-out.c (pack_ts_decl_common_value_fields): Stream DECL_NOT_GIMPLE_REG_P. * tree-streamer-in.c (unpack_ts_decl_common_value_fields): Likewise. * cfgexpand.c (avoid_type_punning_on_regs): New. (discover_nonconstant_array_refs): Call avoid_type_punning_on_regs to avoid unsupported mode punning. lto/ * lto-common.c (compare_tree_sccs_1): Compare DECL_NOT_GIMPLE_REG_P. c/ * gimple-parser.c (c_parser_parse_ssa_name): Do not set DECL_GIMPLE_REG_P. cp/ * optimize.c (update_cloned_parm): Copy DECL_NOT_GIMPLE_REG_P. * gcc.dg/tree-ssa/pr94703.c: New testcase. --- gcc/tree-sra.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 227bde0..4793b48 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2168,15 +2168,9 @@ create_access_replacement (struct access *access, tree reg_type = NULL_TREE) variant. This avoids issues with weirdo ABIs like AAPCS. */ repl = create_tmp_var (build_qualified_type (TYPE_MAIN_VARIANT (type), TYPE_QUALS (type)), "SR"); - if (TREE_CODE (type) == COMPLEX_TYPE - || TREE_CODE (type) == VECTOR_TYPE) - { - if (!access->grp_partial_lhs) - DECL_GIMPLE_REG_P (repl) = 1; - } - else if (access->grp_partial_lhs - && is_gimple_reg_type (type)) - TREE_ADDRESSABLE (repl) = 1; + if (access->grp_partial_lhs + && is_gimple_reg_type (type)) + DECL_NOT_GIMPLE_REG_P (repl) = 1; DECL_SOURCE_LOCATION (repl) = DECL_SOURCE_LOCATION (access->base); DECL_ARTIFICIAL (repl) = 1; -- cgit v1.1 From fb149ebdfee8995ed091f17cd64355ff54e9fb30 Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Mon, 15 Jun 2020 19:42:11 +0200 Subject: Fix ICE in verify_sra_access_forest This fixes an issue with reverse storage order in SRA, which is caught by the built-in verifier in verify_sra_access_forest. The problem is that propagate_subaccesses_from_rhs changes the type of an access from aggregate to scalar and, as discussed elsewhere, this must be done with extra care in the presence of reverse storage order. gcc/ChangeLog * tree-sra.c (propagate_subaccesses_from_rhs): When a non-scalar access on the LHS is replaced with a scalar access, propagate the TYPE_REVERSE_STORAGE_ORDER flag of the type of the original access. gcc/testsuite/ChangeLog * gnat.dg/opt85.ads, gnat.dg/opt85.adb: New test. --- gcc/tree-sra.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'gcc/tree-sra.c') diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 4793b48..fcba7fb 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -2758,6 +2758,9 @@ propagate_subaccesses_from_rhs (struct access *lacc, struct access *racc) } if (!lacc->first_child && !racc->first_child) { + /* We are about to change the access type from aggregate to scalar, + so we need to put the reverse flag onto the access, if any. */ + const bool reverse = TYPE_REVERSE_STORAGE_ORDER (lacc->type); tree t = lacc->base; lacc->type = racc->type; @@ -2772,9 +2775,12 @@ propagate_subaccesses_from_rhs (struct access *lacc, struct access *racc) lacc->expr = build_ref_for_model (EXPR_LOCATION (lacc->base), lacc->base, lacc->offset, racc, NULL, false); + if (TREE_CODE (lacc->expr) == MEM_REF) + REF_REVERSE_STORAGE_ORDER (lacc->expr) = reverse; lacc->grp_no_warning = true; lacc->grp_same_access_path = false; } + lacc->reverse = reverse; } return ret; } -- cgit v1.1