diff options
Diffstat (limited to 'gcc/tree-vect-slp.cc')
| -rw-r--r-- | gcc/tree-vect-slp.cc | 116 |
1 files changed, 111 insertions, 5 deletions
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 66c4518..aa6c3e2 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -558,7 +558,8 @@ vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false, if (gimple_assign_rhs_code (assign) == COND_EXPR && COMPARISON_CLASS_P (gimple_assign_rhs1 (assign))) gcc_unreachable (); - if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison + if ((TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison + || commutative_tree_code (gimple_assign_rhs_code (assign))) && swap) return op1_op0_map; if (gather_scatter_p) @@ -1352,7 +1353,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, uniform but only that of the first stmt matters. */ && !(first_reduc_idx != -1 && STMT_VINFO_REDUC_IDX (stmt_info) != -1 - && REDUC_GROUP_FIRST_ELEMENT (stmt_info))) + && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + && !(first_reduc_idx != -1 + && STMT_VINFO_REDUC_IDX (stmt_info) != -1 + && rhs_code.is_tree_code () + && commutative_tree_code (tree_code (rhs_code)) + && first_reduc_idx == 1 - STMT_VINFO_REDUC_IDX (stmt_info))) { if (dump_enabled_p ()) { @@ -1617,6 +1623,15 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, && (swap_tree_comparison ((tree_code)first_stmt_code) == (tree_code)rhs_code)) swap[i] = 1; + + if (i != 0 + && first_reduc_idx != STMT_VINFO_REDUC_IDX (stmt_info) + && first_reduc_idx != -1 + && STMT_VINFO_REDUC_IDX (stmt_info) != -1 + && rhs_code.is_tree_code () + && commutative_tree_code (tree_code (rhs_code)) + && first_reduc_idx == 1 - STMT_VINFO_REDUC_IDX (stmt_info)) + swap[i] = 1; } matches[i] = true; @@ -4164,6 +4179,10 @@ vect_build_slp_instance (vec_info *vinfo, if (dump_enabled_p ()) { + if (kind == slp_inst_kind_reduc_group) + dump_printf_loc (MSG_NOTE, vect_location, + "SLP discovery of size %d reduction group " + "succeeded\n", group_size); dump_printf_loc (MSG_NOTE, vect_location, "Final SLP tree for instance %p:\n", (void *) new_instance); @@ -6302,6 +6321,7 @@ private: /* Layout selection. */ bool is_compatible_layout (slp_tree, unsigned int); + bool is_compatible_layout (const slpg_partition_info &, unsigned int); int change_layout_cost (slp_tree, unsigned int, unsigned int); slpg_partition_layout_costs &partition_layout_costs (unsigned int, unsigned int); @@ -6309,6 +6329,7 @@ private: int, unsigned int); int internal_node_cost (slp_tree, int, unsigned int); void start_choosing_layouts (); + bool legitimize (); /* Cost propagation. */ slpg_layout_cost edge_layout_cost (graph_edge *, unsigned int, @@ -6715,6 +6736,29 @@ vect_optimize_slp_pass::is_compatible_layout (slp_tree node, return true; } +/* Return true if layout LAYOUT_I is compatible with the number of SLP lanes + that NODE would operate on for each NODE in PARTITION. + This test is independent of NODE's actual operations. */ + +bool +vect_optimize_slp_pass::is_compatible_layout (const slpg_partition_info + &partition, + unsigned int layout_i) +{ + for (unsigned int order_i = partition.node_begin; + order_i < partition.node_end; ++order_i) + { + unsigned int node_i = m_partitioned_nodes[order_i]; + auto &vertex = m_vertices[node_i]; + + /* The layout is incompatible if it is individually incompatible + with any node in the partition. */ + if (!is_compatible_layout (vertex.node, layout_i)) + return false; + } + return true; +} + /* Return the cost (in arbtirary units) of going from layout FROM_LAYOUT_I to layout TO_LAYOUT_I for a node like NODE. Return -1 if either of the layouts is incompatible with NODE or if the change is not possible for @@ -8034,6 +8078,62 @@ vect_optimize_slp_pass::decide_masked_load_lanes () } } +/* Perform legitimizing attempts. This is intended to improve the + situation when layout 0 is not valid which is a situation the cost + based propagation does not handle well. + Return true if further layout optimization is possible, false if + the layout configuration should be considered final. */ + +bool +vect_optimize_slp_pass::legitimize () +{ + /* Perform a very simple legitimizing attempt by attempting to choose + a single layout for all partitions that will make all permutations + a noop. That should also be the optimal layout choice in case + layout zero is legitimate. + ??? Disconnected components of the SLP graph could have distinct + single layouts. */ + int single_layout_i = -1; + unsigned deferred_up_to = -1U; + for (unsigned partition_i = 0; partition_i < m_partitions.length (); + ++partition_i) + { + auto &partition = m_partitions[partition_i]; + if (single_layout_i == -1) + { + single_layout_i = partition.layout; + deferred_up_to = partition_i; + } + else if (partition.layout == single_layout_i || partition.layout == -1) + ; + else + single_layout_i = 0; + if (single_layout_i == 0) + return true; + + if (single_layout_i != -1 + && !is_compatible_layout (partition, single_layout_i)) + return true; + } + + if (single_layout_i <= 0) + return true; + + for (unsigned partition_i = 0; partition_i < deferred_up_to; ++partition_i) + if (!is_compatible_layout (m_partitions[partition_i], + single_layout_i)) + return true; + + for (unsigned partition_i = 0; partition_i < m_partitions.length (); + ++partition_i) + { + auto &partition = m_partitions[partition_i]; + partition.layout = single_layout_i; + } + + return false; +} + /* Main entry point for the SLP graph optimization pass. */ void @@ -8044,8 +8144,11 @@ vect_optimize_slp_pass::run () start_choosing_layouts (); if (m_perms.length () > 1) { - forward_pass (); - backward_pass (); + if (legitimize ()) + { + forward_pass (); + backward_pass (); + } if (dump_enabled_p ()) dump (); materialize (); @@ -9036,8 +9139,11 @@ vect_slp_analyze_operations (vec_info *vinfo) stmt_vec_info stmt_info; if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ()) stmt_info = SLP_INSTANCE_ROOT_STMTS (instance)[0]; - else + else if (!SLP_TREE_SCALAR_STMTS (node).is_empty () + && SLP_TREE_SCALAR_STMTS (node)[0]) stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; + else + stmt_info = SLP_TREE_REPRESENTATIVE (node); if (is_a <loop_vec_info> (vinfo)) { if (dump_enabled_p ()) |
