aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vectorizer.h')
-rw-r--r--gcc/tree-vectorizer.h238
1 files changed, 209 insertions, 29 deletions
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index b861c97..7453d2a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -27,7 +27,7 @@ typedef class _stmt_vec_info *stmt_vec_info;
#include "tree-hash-traits.h"
#include "target.h"
#include "internal-fn.h"
-
+#include "tree-ssa-operands.h"
/* Used for naming of new temporaries. */
enum vect_var_kind {
@@ -106,10 +106,11 @@ struct stmt_info_for_cost {
typedef vec<stmt_info_for_cost> stmt_vector_for_cost;
-/* Maps base addresses to an innermost_loop_behavior that gives the maximum
- known alignment for that base. */
+/* Maps base addresses to an innermost_loop_behavior and the stmt it was
+ derived from that gives the maximum known alignment for that base. */
typedef hash_map<tree_operand_hash,
- innermost_loop_behavior *> vec_base_alignments;
+ std::pair<stmt_vec_info, innermost_loop_behavior *> >
+ vec_base_alignments;
/************************************************************************
SLP
@@ -167,6 +168,11 @@ struct _slp_tree {
int vertex;
+ /* If not NULL this is a cached failed SLP discovery attempt with
+ the lanes that failed during SLP discovery as 'false'. This is
+ a copy of the matches array. */
+ bool *failed;
+
/* Allocate from slp_tree_pool. */
static void *operator new (size_t);
@@ -185,6 +191,7 @@ enum slp_instance_kind {
slp_inst_kind_store,
slp_inst_kind_reduc_group,
slp_inst_kind_reduc_chain,
+ slp_inst_kind_bb_reduc,
slp_inst_kind_ctor
};
@@ -197,7 +204,7 @@ public:
/* For vector constructors, the constructor stmt that the SLP tree is built
from, NULL otherwise. */
- stmt_vec_info root_stmt;
+ vec<stmt_vec_info> root_stmts;
/* The unrolling factor required to vectorized this SLP instance. */
poly_uint64 unrolling_factor;
@@ -226,7 +233,7 @@ public:
#define SLP_INSTANCE_TREE(S) (S)->root
#define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor
#define SLP_INSTANCE_LOADS(S) (S)->loads
-#define SLP_INSTANCE_ROOT_STMT(S) (S)->root_stmt
+#define SLP_INSTANCE_ROOT_STMTS(S) (S)->root_stmts
#define SLP_INSTANCE_KIND(S) (S)->kind
#define SLP_TREE_CHILDREN(S) (S)->children
@@ -545,6 +552,18 @@ typedef auto_vec<rgroup_controls> vec_loop_lens;
typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
+/* Information about a reduction accumulator from the main loop that could
+ conceivably be reused as the input to a reduction in an epilogue loop. */
+struct vect_reusable_accumulator {
+ /* The final value of the accumulator, which forms the input to the
+ reduction operation. */
+ tree reduc_input;
+
+ /* The stmt_vec_info that describes the reduction (i.e. the one for
+ which is_reduc_info is true). */
+ stmt_vec_info reduc_info;
+};
+
/*-----------------------------------------------------------------*/
/* Info on vectorized loops. */
/*-----------------------------------------------------------------*/
@@ -582,6 +601,26 @@ public:
/* Unrolling factor */
poly_uint64 vectorization_factor;
+ /* If this loop is an epilogue loop whose main loop can be skipped,
+ MAIN_LOOP_EDGE is the edge from the main loop to this loop's
+ preheader. SKIP_MAIN_LOOP_EDGE is then the edge that skips the
+ main loop and goes straight to this loop's preheader.
+
+ Both fields are null otherwise. */
+ edge main_loop_edge;
+ edge skip_main_loop_edge;
+
+ /* If this loop is an epilogue loop that might be skipped after executing
+ the main loop, this edge is the one that skips the epilogue. */
+ edge skip_this_loop_edge;
+
+ /* The vectorized form of a standard reduction replaces the original
+ scalar code's final result (a loop-closed SSA PHI) with the result
+ of a vector-to-scalar reduction operation. After vectorization,
+ this variable maps these vector-to-scalar results to information
+ about the reductions that generated them. */
+ hash_map<tree, vect_reusable_accumulator> reusable_accumulators;
+
/* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR
if there is no particular limit. */
unsigned HOST_WIDE_INT max_vectorization_factor;
@@ -689,6 +728,10 @@ public:
/* The cost of the vector loop body. */
int vec_inside_cost;
+ /* The factor used to over weight those statements in an inner loop
+ relative to the loop being vectorized. */
+ unsigned int inner_loop_cost_factor;
+
/* Is the loop vectorizable? */
bool vectorizable;
@@ -807,6 +850,7 @@ public:
#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
#define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
+#define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
#define LOOP_VINFO_FULLY_MASKED_P(L) \
(LOOP_VINFO_USING_PARTIAL_VECTORS_P (L) \
@@ -856,11 +900,11 @@ loop_vec_info_for_loop (class loop *loop)
struct slp_root
{
slp_root (slp_instance_kind kind_, vec<stmt_vec_info> stmts_,
- stmt_vec_info root_)
- : kind(kind_), stmts(stmts_), root(root_) {}
+ vec<stmt_vec_info> roots_)
+ : kind(kind_), stmts(stmts_), roots(roots_) {}
slp_instance_kind kind;
vec<stmt_vec_info> stmts;
- stmt_vec_info root;
+ vec<stmt_vec_info> roots;
};
typedef class _bb_vec_info : public vec_info
@@ -1016,6 +1060,9 @@ public:
data_reference *dr;
/* The statement that contains the data reference. */
stmt_vec_info stmt;
+ /* The analysis group this DR belongs to when doing BB vectorization.
+ DRs of the same group belong to the same conditional execution context. */
+ unsigned group;
/* The misalignment in bytes of the reference, or -1 if not known. */
int misalignment;
/* The byte alignment that we'd ideally like the reference to have,
@@ -1175,6 +1222,23 @@ public:
/* The vector type for performing the actual reduction. */
tree reduc_vectype;
+ /* If IS_REDUC_INFO is true and if the vector code is performing
+ N scalar reductions in parallel, this variable gives the initial
+ scalar values of those N reductions. */
+ vec<tree> reduc_initial_values;
+
+ /* If IS_REDUC_INFO is true and if the vector code is performing
+ N scalar reductions in parallel, this variable gives the vectorized code's
+ final (scalar) result for each of those N reductions. In other words,
+ REDUC_SCALAR_RESULTS[I] replaces the original scalar code's loop-closed
+ SSA PHI for reduction number I. */
+ vec<tree> reduc_scalar_results;
+
+ /* Only meaningful if IS_REDUC_INFO. If non-null, the reduction is
+ being performed by an epilogue loop and we have decided to reuse
+ this accumulator from the main loop. */
+ vect_reusable_accumulator *reused_accumulator;
+
/* Whether we force a single cycle PHI during reduction vectorization. */
bool force_single_cycle;
@@ -1358,6 +1422,19 @@ nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info)
&& (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father));
}
+/* PHI is either a scalar reduction phi or a scalar induction phi.
+ Return the initial value of the variable on entry to the containing
+ loop. */
+
+static inline tree
+vect_phi_initial_value (gphi *phi)
+{
+ basic_block bb = gimple_bb (phi);
+ edge pe = loop_preheader_edge (bb->loop_father);
+ gcc_assert (pe->dest == bb);
+ return PHI_ARG_DEF_FROM_EDGE (phi, pe);
+}
+
/* Return true if STMT_INFO should produce a vector mask type rather than
a normal nonmask type. */
@@ -1455,9 +1532,9 @@ int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
/* Alias targetm.vectorize.init_cost. */
static inline void *
-init_cost (class loop *loop_info)
+init_cost (class loop *loop_info, bool costing_for_scalar)
{
- return targetm.vectorize.init_cost (loop_info);
+ return targetm.vectorize.init_cost (loop_info, costing_for_scalar);
}
extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
@@ -1788,6 +1865,7 @@ class loop *vect_loop_versioning (loop_vec_info, gimple *);
extern class loop *vect_do_peeling (loop_vec_info, tree, tree,
tree *, tree *, tree *, int, bool, bool,
tree *);
+extern tree vect_get_main_loop_result (loop_vec_info, tree, tree);
extern void vect_prepare_for_masked_peels (loop_vec_info);
extern dump_user_location_t find_loop_location (class loop *);
extern bool vect_can_advance_ivs_p (loop_vec_info);
@@ -1874,7 +1952,8 @@ extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
extern void optimize_mask_stores (class loop*);
-extern gcall *vect_gen_while (tree, tree, tree);
+extern tree vect_gen_while (gimple_seq *, tree, tree, tree,
+ const char * = nullptr);
extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
extern opt_result vect_get_vector_types_for_stmt (vec_info *,
stmt_vec_info, tree *,
@@ -1885,8 +1964,7 @@ extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0);
extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
extern enum dr_alignment_support vect_supportable_dr_alignment
(vec_info *, dr_vec_info *, bool);
-extern tree vect_get_smallest_scalar_type (stmt_vec_info, HOST_WIDE_INT *,
- HOST_WIDE_INT *);
+extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *);
extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
@@ -1916,8 +1994,8 @@ extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
-extern void vect_permute_store_chain (vec_info *,
- vec<tree> ,unsigned int, stmt_vec_info,
+extern void vect_permute_store_chain (vec_info *, vec<tree> &,
+ unsigned int, stmt_vec_info,
gimple_stmt_iterator *, vec<tree> *);
extern tree vect_setup_realignment (vec_info *,
stmt_vec_info, gimple_stmt_iterator *,
@@ -1961,6 +2039,7 @@ extern tree vect_get_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
unsigned int);
extern gimple_seq vect_gen_len (tree, tree, tree, tree);
extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
+extern bool reduction_fn_for_scalar_code (enum tree_code, internal_fn *);
/* Drive for loop transformation stage. */
extern class loop *vect_transform_loop (loop_vec_info, gimple *);
@@ -1986,7 +2065,8 @@ extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
gimple **, slp_tree);
extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
stmt_vector_for_cost *);
-extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
+extern bool vect_emulated_vector_p (tree);
+extern bool vect_can_vectorize_without_simd_p (tree_code);
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
stmt_vector_for_cost *,
stmt_vector_for_cost *,
@@ -1997,12 +2077,12 @@ extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
extern void vect_slp_init (void);
extern void vect_slp_fini (void);
extern void vect_free_slp_instance (slp_instance);
-extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, vec<tree>,
+extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree> &,
gimple_stmt_iterator *, poly_uint64,
bool, unsigned *,
- unsigned * = nullptr);
+ unsigned * = nullptr, bool = false);
extern bool vect_slp_analyze_operations (vec_info *);
-extern void vect_schedule_slp (vec_info *, vec<slp_instance>);
+extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
extern opt_result vect_analyze_slp (vec_info *, unsigned);
extern bool vect_make_slp_decision (loop_vec_info);
extern void vect_detect_hybrid_slp (loop_vec_info);
@@ -2011,7 +2091,7 @@ extern void vect_gather_slp_loads (vec_info *);
extern void vect_get_slp_defs (slp_tree, vec<tree> *);
extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
unsigned n = -1U);
-extern bool vect_slp_bb (basic_block);
+extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
extern bool vect_slp_function (function *);
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
@@ -2020,7 +2100,7 @@ extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
unsigned int * = NULL,
tree * = NULL, tree * = NULL);
extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
- vec<tree>, unsigned int, vec<tree> &);
+ const vec<tree> &, unsigned int, vec<tree> &);
extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
extern bool vect_update_shared_vectype (stmt_vec_info, tree);
extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
@@ -2059,13 +2139,8 @@ typedef enum _complex_perm_kinds {
PERM_TOP
} complex_perm_kinds_t;
-/* A pair with a load permute and a corresponding complex_perm_kind which gives
- information about the load it represents. */
-typedef std::pair<complex_perm_kinds_t, load_permutation_t>
- complex_load_perm_t;
-
/* Cache from nodes to the load permutation they represent. */
-typedef hash_map <slp_tree, complex_load_perm_t>
+typedef hash_map <slp_tree, complex_perm_kinds_t>
slp_tree_to_load_perm_map_t;
/* Vector pattern matcher base class. All SLP pattern matchers must inherit
@@ -2093,7 +2168,8 @@ class vect_pattern
this->m_ifn = ifn;
this->m_node = node;
this->m_ops.create (0);
- this->m_ops.safe_splice (*m_ops);
+ if (m_ops)
+ this->m_ops.safe_splice (*m_ops);
}
public:
@@ -2121,4 +2197,108 @@ extern vect_pattern_decl_t slp_patterns[];
/* Number of supported pattern matchers. */
extern size_t num__slp_patterns;
+/* ----------------------------------------------------------------------
+ Target support routines
+ -----------------------------------------------------------------------
+ The following routines are provided to simplify costing decisions in
+ target code. Please add more as needed. */
+
+/* Return true if an operaton of kind KIND for STMT_INFO represents
+ the extraction of an element from a vector in preparation for
+ storing the element to memory. */
+inline bool
+vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
+{
+ return (kind == vec_to_scalar
+ && STMT_VINFO_DATA_REF (stmt_info)
+ && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)));
+}
+
+/* Return true if STMT_INFO represents part of a reduction. */
+inline bool
+vect_is_reduction (stmt_vec_info stmt_info)
+{
+ return (STMT_VINFO_REDUC_DEF (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)));
+}
+
+/* If STMT_INFO describes a reduction, return the vect_reduction_type
+ of the reduction it describes, otherwise return -1. */
+inline int
+vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info)
+{
+ if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
+ if (STMT_VINFO_REDUC_DEF (stmt_info))
+ {
+ stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
+ return int (STMT_VINFO_REDUC_TYPE (reduc_info));
+ }
+ return -1;
+}
+
+/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the
+ scalar type of the values being compared. Return null otherwise. */
+inline tree
+vect_embedded_comparison_type (stmt_vec_info stmt_info)
+{
+ if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
+ if (gimple_assign_rhs_code (assign) == COND_EXPR)
+ {
+ tree cond = gimple_assign_rhs1 (assign);
+ if (COMPARISON_CLASS_P (cond))
+ return TREE_TYPE (TREE_OPERAND (cond, 0));
+ }
+ return NULL_TREE;
+}
+
+/* If STMT_INFO is a comparison or contains an embedded comparison, return the
+ scalar type of the values being compared. Return null otherwise. */
+inline tree
+vect_comparison_type (stmt_vec_info stmt_info)
+{
+ if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt))
+ if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
+ return TREE_TYPE (gimple_assign_rhs1 (assign));
+ return vect_embedded_comparison_type (stmt_info);
+}
+
+/* Return true if STMT_INFO extends the result of a load. */
+inline bool
+vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info)
+{
+ /* Although this is quite large for an inline function, this part
+ at least should be inline. */
+ gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+ return false;
+
+ tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
+ tree rhs_type = TREE_TYPE (rhs);
+ if (!INTEGRAL_TYPE_P (lhs_type)
+ || !INTEGRAL_TYPE_P (rhs_type)
+ || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
+ return false;
+
+ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
+ return (def_stmt_info
+ && STMT_VINFO_DATA_REF (def_stmt_info)
+ && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
+}
+
+/* Return true if STMT_INFO is an integer truncation. */
+inline bool
+vect_is_integer_truncation (stmt_vec_info stmt_info)
+{
+ gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
+ if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
+ return false;
+
+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
+ tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
+ return (INTEGRAL_TYPE_P (lhs_type)
+ && INTEGRAL_TYPE_P (rhs_type)
+ && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
+}
+
#endif /* GCC_TREE_VECTORIZER_H */