aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-reassoc.c
diff options
context:
space:
mode:
authorKewen Lin <linkw@gcc.gnu.org>2019-07-15 05:12:05 +0000
committerKewen Lin <linkw@gcc.gnu.org>2019-07-15 05:12:05 +0000
commit6c2833e74e4e64a71bafaf6e20e65506bbce5a5c (patch)
tree72bf6558a965e41b38734da8eeb6753c2f53c690 /gcc/tree-ssa-reassoc.c
parent3126c241afaa38bc9002b3c15e244070b80af09d (diff)
downloadgcc-6c2833e74e4e64a71bafaf6e20e65506bbce5a5c.zip
gcc-6c2833e74e4e64a71bafaf6e20e65506bbce5a5c.tar.gz
gcc-6c2833e74e4e64a71bafaf6e20e65506bbce5a5c.tar.bz2
re PR tree-optimization/88497 (Improve Accumulation in Auto-Vectorized Code)
gcc/ChangeLog 2019-07-15 Kewen Lin <linkw@gcc.gnu.org> PR tree-optimization/88497 * tree-ssa-reassoc.c (reassociate_bb): Swap the positions of GIMPLE_BINARY_RHS check and gimple_visited_p check, call new function undistribute_bitref_for_vector. (undistribute_bitref_for_vector): New function. (cleanup_vinfo_map): Likewise. (sort_by_mach_mode): Likewise. gcc/testsuite/ChangeLog 2019-07-15 Kewen Lin <linkw@gcc.gnu.org> PR tree-optimization/88497 * gcc.dg/tree-ssa/pr88497-1.c: New test. * gcc.dg/tree-ssa/pr88497-2.c: Likewise. * gcc.dg/tree-ssa/pr88497-3.c: Likewise. * gcc.dg/tree-ssa/pr88497-4.c: Likewise. * gcc.dg/tree-ssa/pr88497-5.c: Likewise. * gcc.dg/tree-ssa/pr88497-6.c: Likewise. * gcc.dg/tree-ssa/pr88497-7.c: Likewise. From-SVN: r273490
Diffstat (limited to 'gcc/tree-ssa-reassoc.c')
-rw-r--r--gcc/tree-ssa-reassoc.c285
1 files changed, 279 insertions, 6 deletions
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 635fc93..df76e66 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -1772,6 +1772,274 @@ undistribute_ops_list (enum tree_code opcode,
return changed;
}
+/* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
+ first: element index for each relevant BIT_FIELD_REF.
+ second: the index of vec ops* for each relevant BIT_FIELD_REF. */
+typedef std::pair<unsigned, unsigned> v_info_elem;
+typedef auto_vec<v_info_elem, 32> v_info;
+typedef v_info *v_info_ptr;
+
+/* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
+static int
+sort_by_mach_mode (const void *p_i, const void *p_j)
+{
+ const tree tr1 = *((const tree *) p_i);
+ const tree tr2 = *((const tree *) p_j);
+ unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
+ unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
+ if (mode1 > mode2)
+ return 1;
+ else if (mode1 < mode2)
+ return -1;
+ else
+ return 0;
+}
+
+/* Cleanup hash map for VECTOR information. */
+static void
+cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
+{
+ for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
+ it != info_map.end (); ++it)
+ {
+ v_info_ptr info = (*it).second;
+ delete info;
+ (*it).second = NULL;
+ }
+}
+
+/* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
+ V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
+ is transformed to
+ Vs = (V1 + V2 + ... + Vn)
+ Vs[0] + Vs[1] + ... + Vs[k]
+
+ The basic steps are listed below:
+
+ 1) Check the addition chain *OPS by looking those summands coming from
+ VECTOR bit_field_ref on VECTOR type. Put the information into
+ v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
+
+ 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
+ continuous, they can cover the whole VECTOR perfectly without any holes.
+ Obtain one VECTOR list which contain candidates to be transformed.
+
+ 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
+ candidates with same mode, build the addition statements for them and
+ generate BIT_FIELD_REFs accordingly.
+
+ TODO:
+ The current implementation requires the whole VECTORs should be fully
+ covered, but it can be extended to support partial, checking adjacent
+ but not fill the whole, it may need some cost model to define the
+ boundary to do or not.
+*/
+static bool
+undistribute_bitref_for_vector (enum tree_code opcode,
+ vec<operand_entry *> *ops, struct loop *loop)
+{
+ if (ops->length () <= 1)
+ return false;
+
+ if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != BIT_XOR_EXPR
+ && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
+ return false;
+
+ hash_map<tree, v_info_ptr> v_info_map;
+ operand_entry *oe1;
+ unsigned i;
+
+ /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
+ information into map. */
+ FOR_EACH_VEC_ELT (*ops, i, oe1)
+ {
+ enum tree_code dcode;
+ gimple *oe1def;
+
+ if (TREE_CODE (oe1->op) != SSA_NAME)
+ continue;
+ oe1def = SSA_NAME_DEF_STMT (oe1->op);
+ if (!is_gimple_assign (oe1def))
+ continue;
+ dcode = gimple_assign_rhs_code (oe1def);
+ if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
+ continue;
+
+ tree rhs = gimple_assign_rhs1 (oe1def);
+ tree vec = TREE_OPERAND (rhs, 0);
+ tree vec_type = TREE_TYPE (vec);
+
+ if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
+ continue;
+
+ /* Ignore it if target machine can't support this VECTOR type. */
+ if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
+ continue;
+
+ /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
+ if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
+ continue;
+
+ tree elem_type = TREE_TYPE (vec_type);
+ unsigned HOST_WIDE_INT elem_size
+ = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+ if (maybe_ne (bit_field_size (rhs), elem_size))
+ continue;
+
+ unsigned idx;
+ if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
+ continue;
+
+ /* Ignore it if target machine can't support this type of VECTOR
+ operation. */
+ optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
+ if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
+ continue;
+
+ bool existed;
+ v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
+ if (!existed)
+ info = new v_info;
+ info->safe_push (std::make_pair (idx, i));
+ }
+
+ /* At least two VECTOR to combine. */
+ if (v_info_map.elements () <= 1)
+ {
+ cleanup_vinfo_map (v_info_map);
+ return false;
+ }
+
+ /* Verify all VECTOR candidates by checking two conditions:
+ 1) sorted offsets are adjacent, no holes.
+ 2) can fill the whole VECTOR perfectly.
+ And add the valid candidates to a vector for further handling. */
+ auto_vec<tree> valid_vecs (v_info_map.elements ());
+ for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
+ it != v_info_map.end (); ++it)
+ {
+ tree cand_vec = (*it).first;
+ v_info_ptr cand_info = (*it).second;
+ unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant ();
+ if (cand_info->length () != num_elems)
+ continue;
+ sbitmap holes = sbitmap_alloc (num_elems);
+ bitmap_ones (holes);
+ bool valid = true;
+ v_info_elem *curr;
+ FOR_EACH_VEC_ELT (*cand_info, i, curr)
+ {
+ if (!bitmap_bit_p (holes, curr->first))
+ {
+ valid = false;
+ break;
+ }
+ else
+ bitmap_clear_bit (holes, curr->first);
+ }
+ if (valid && bitmap_empty_p (holes))
+ valid_vecs.quick_push (cand_vec);
+ sbitmap_free (holes);
+ }
+
+ /* At least two VECTOR to combine. */
+ if (valid_vecs.length () <= 1)
+ {
+ cleanup_vinfo_map (v_info_map);
+ return false;
+ }
+
+ valid_vecs.qsort (sort_by_mach_mode);
+ /* Go through all candidates by machine mode order, query the mode_to_total
+ to get the total number for each mode and skip the single one. */
+ for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
+ {
+ tree tvec = valid_vecs[i];
+ enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
+
+ /* Skip modes with only a single candidate. */
+ if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
+ continue;
+
+ unsigned int idx, j;
+ gimple *sum = NULL;
+ v_info_ptr info_ptr;
+ tree sum_vec = tvec;
+ v_info_elem *elem;
+
+ /* Build the sum for all candidates with same mode. */
+ do
+ {
+ sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
+ valid_vecs[i + 1], opcode);
+ sum_vec = gimple_get_lhs (sum);
+ info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
+ /* Update those related ops of current candidate VECTOR. */
+ FOR_EACH_VEC_ELT (*info_ptr, j, elem)
+ {
+ idx = elem->second;
+ gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
+ /* Set this then op definition will get DCEd later. */
+ gimple_set_visited (def, true);
+ if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
+ || opcode == BIT_IOR_EXPR)
+ (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
+ else if (opcode == MULT_EXPR)
+ (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
+ else
+ {
+ gcc_assert (opcode == BIT_AND_EXPR);
+ (*ops)[idx]->op
+ = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
+ }
+ (*ops)[idx]->rank = 0;
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Generating addition -> ");
+ print_gimple_stmt (dump_file, sum, 0);
+ }
+ i++;
+ }
+ while ((i < valid_vecs.length () - 1)
+ && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
+
+ /* Referring to first valid VECTOR with this mode, generate the
+ BIT_FIELD_REF statements accordingly. */
+ info_ptr = *(v_info_map.get (tvec));
+ gcc_assert (sum);
+ tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
+ FOR_EACH_VEC_ELT (*info_ptr, j, elem)
+ {
+ idx = elem->second;
+ tree dst = make_ssa_name (elem_type);
+ gimple *gs = gimple_build_assign (
+ dst, BIT_FIELD_REF,
+ build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
+ bitsize_int (elem->first
+ * tree_to_uhwi (TYPE_SIZE (elem_type)))));
+ insert_stmt_after (gs, sum);
+ gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
+ /* Set this then op definition will get DCEd later. */
+ gimple_set_visited (def, true);
+ (*ops)[idx]->op = gimple_assign_lhs (gs);
+ (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Generating bit_field_ref -> ");
+ print_gimple_stmt (dump_file, gs, 0);
+ }
+ }
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
+
+ cleanup_vinfo_map (v_info_map);
+
+ return true;
+}
+
/* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
expression, examine the other OPS to see if any of them are comparisons
of the same values, which we may be able to combine or eliminate.
@@ -5879,11 +6147,6 @@ reassociate_bb (basic_block bb)
tree lhs, rhs1, rhs2;
enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
- /* If this is not a gimple binary expression, there is
- nothing for us to do with it. */
- if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
- continue;
-
/* If this was part of an already processed statement,
we don't need to touch it again. */
if (gimple_visited_p (stmt))
@@ -5910,6 +6173,11 @@ reassociate_bb (basic_block bb)
continue;
}
+ /* If this is not a gimple binary expression, there is
+ nothing for us to do with it. */
+ if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
+ continue;
+
lhs = gimple_assign_lhs (stmt);
rhs1 = gimple_assign_rhs1 (stmt);
rhs2 = gimple_assign_rhs2 (stmt);
@@ -5948,7 +6216,12 @@ reassociate_bb (basic_block bb)
ops.qsort (sort_by_operand_rank);
optimize_ops_list (rhs_code, &ops);
}
-
+ if (undistribute_bitref_for_vector (rhs_code, &ops,
+ loop_containing_stmt (stmt)))
+ {
+ ops.qsort (sort_by_operand_rank);
+ optimize_ops_list (rhs_code, &ops);
+ }
if (rhs_code == PLUS_EXPR
&& transform_add_to_multiply (&ops))
ops.qsort (sort_by_operand_rank);