aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2013-12-10 12:46:01 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2013-12-10 12:46:01 +0100
commit5ce9450f162cef332dbd6534e7c3e246caee70c6 (patch)
tree638f17e2a1b480505958dcbd6cca009ca6ade040 /gcc/tree-vectorizer.c
parentae3df2dc0a108b120304fe7725f58efc316fdd9a (diff)
downloadgcc-5ce9450f162cef332dbd6534e7c3e246caee70c6.zip
gcc-5ce9450f162cef332dbd6534e7c3e246caee70c6.tar.gz
gcc-5ce9450f162cef332dbd6534e7c3e246caee70c6.tar.bz2
tree-vectorizer.h (struct _loop_vec_info): Add scalar_loop field.
* tree-vectorizer.h (struct _loop_vec_info): Add scalar_loop field. (LOOP_VINFO_SCALAR_LOOP): Define. (slpeel_tree_duplicate_loop_to_edge_cfg): Add scalar_loop argument. * config/i386/sse.md (maskload<mode>, maskstore<mode>): New expanders. * tree-data-ref.c (get_references_in_stmt): Handle MASK_LOAD and MASK_STORE. * internal-fn.def (LOOP_VECTORIZED, MASK_LOAD, MASK_STORE): New internal fns. * tree-if-conv.c: Include expr.h, optabs.h, tree-ssa-loop-ivopts.h and tree-ssa-address.h. (release_bb_predicate): New function. (free_bb_predicate): Use it. (reset_bb_predicate): Likewise. Don't unallocate bb->aux just to immediately allocate it again. (add_to_predicate_list): Add loop argument. If basic blocks that dominate loop->latch don't insert any predicate. (add_to_dst_predicate_list): Adjust caller. (if_convertible_phi_p): Add any_mask_load_store argument, if true, handle it like flag_tree_loop_if_convert_stores. (insert_gimplified_predicates): Likewise. (ifcvt_can_use_mask_load_store): New function. (if_convertible_gimple_assign_stmt_p): Add any_mask_load_store argument, check if some conditional loads or stores can't be converted into MASK_LOAD or MASK_STORE. (if_convertible_stmt_p): Add any_mask_load_store argument, pass it down to if_convertible_gimple_assign_stmt_p. (predicate_bbs): Don't return bool, only check if the last stmt of a basic block is GIMPLE_COND and handle that. Adjust add_to_predicate_list caller. (if_convertible_loop_p_1): Only call predicate_bbs if flag_tree_loop_if_convert_stores and free_bb_predicate in that case afterwards, check gimple_code of stmts here. Replace is_predicated check with dominance check. Add any_mask_load_store argument, pass it down to if_convertible_stmt_p and if_convertible_phi_p, call if_convertible_phi_p only after all if_convertible_stmt_p calls. (if_convertible_loop_p): Add any_mask_load_store argument, pass it down to if_convertible_loop_p_1. (predicate_mem_writes): Emit MASK_LOAD and/or MASK_STORE calls. (combine_blocks): Add any_mask_load_store argument, pass it down to insert_gimplified_predicates and call predicate_mem_writes if it is set. Call predicate_bbs. (version_loop_for_if_conversion): New function. (tree_if_conversion): Adjust if_convertible_loop_p and combine_blocks calls. Return todo flags instead of bool, call version_loop_for_if_conversion if if-conversion should be just for the vectorized loops and nothing else. (main_tree_if_conversion): Adjust caller. Don't call tree_if_conversion for dont_vectorize loops if if-conversion isn't explicitly enabled. * tree-vect-data-refs.c (vect_check_gather): Handle MASK_LOAD/MASK_STORE. (vect_analyze_data_refs, vect_supportable_dr_alignment): Likewise. * gimple.h (gimple_expr_type): Handle MASK_STORE. * internal-fn.c (expand_LOOP_VECTORIZED, expand_MASK_LOAD, expand_MASK_STORE): New functions. * tree-vectorizer.c: Include tree-cfg.h and gimple-fold.h. (vect_loop_vectorized_call, fold_loop_vectorized_call): New functions. (vectorize_loops): Don't try to vectorize loops with loop->dont_vectorize set. Set LOOP_VINFO_SCALAR_LOOP for if-converted loops, fold LOOP_VECTORIZED internal call depending on if loop has been vectorized or not. * tree-vect-loop-manip.c (slpeel_duplicate_current_defs_from_edges): New function. (slpeel_tree_duplicate_loop_to_edge_cfg): Add scalar_loop argument. If non-NULL, copy basic blocks from scalar_loop instead of loop, but still to loop's entry or exit edge. (slpeel_tree_peel_loop_to_edge): Add scalar_loop argument, pass it down to slpeel_tree_duplicate_loop_to_edge_cfg. (vect_do_peeling_for_loop_bound, vect_do_peeling_for_loop_alignment): Adjust callers. (vect_loop_versioning): If LOOP_VINFO_SCALAR_LOOP, perform loop versioning from that loop instead of LOOP_VINFO_LOOP, move it to the right place in the CFG afterwards. * tree-vect-loop.c (vect_determine_vectorization_factor): Handle MASK_STORE. * cfgloop.h (struct loop): Add dont_vectorize field. * tree-loop-distribution.c (copy_loop_before): Adjust slpeel_tree_duplicate_loop_to_edge_cfg caller. * optabs.def (maskload_optab, maskstore_optab): New optabs. * passes.def: Add a note that pass_vectorize must immediately follow pass_if_conversion. * tree-predcom.c (split_data_refs_to_components): Give up if DR_STMT is a call. * tree-vect-stmts.c (vect_mark_relevant): Don't crash if lhs is NULL. (exist_non_indexing_operands_for_use_p): Handle MASK_LOAD and MASK_STORE. (vectorizable_mask_load_store): New function. (vectorizable_call): Call it for MASK_LOAD or MASK_STORE. (vect_transform_stmt): Handle MASK_STORE. * tree-ssa-phiopt.c (cond_if_else_store_replacement): Ignore DR_STMT where lhs is NULL. * optabs.h (can_vec_perm_p): Fix up comment typo. (can_vec_mask_load_store_p): New prototype. * optabs.c (can_vec_mask_load_store_p): New function. * gcc.dg/vect/vect-cond-11.c: New test. * gcc.target/i386/vect-cond-1.c: New test. * gcc.target/i386/avx2-gather-5.c: New test. * gcc.target/i386/avx2-gather-6.c: New test. * gcc.dg/vect/vect-mask-loadstore-1.c: New test. * gcc.dg/vect/vect-mask-load-1.c: New test. From-SVN: r205856
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r--gcc/tree-vectorizer.c121
1 files changed, 118 insertions, 3 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 6cc07ab..1c411c4 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -75,11 +75,13 @@ along with GCC; see the file COPYING3. If not see
#include "tree-phinodes.h"
#include "ssa-iterators.h"
#include "tree-ssa-loop-manip.h"
+#include "tree-cfg.h"
#include "cfgloop.h"
#include "tree-vectorizer.h"
#include "tree-pass.h"
#include "tree-ssa-propagate.h"
#include "dbgcnt.h"
+#include "gimple-fold.h"
/* Loop or bb location. */
source_location vect_location;
@@ -317,6 +319,60 @@ vect_destroy_datarefs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
}
+/* If LOOP has been versioned during ifcvt, return the internal call
+ guarding it. */
+
+static gimple
+vect_loop_vectorized_call (struct loop *loop)
+{
+ basic_block bb = loop_preheader_edge (loop)->src;
+ gimple g;
+ do
+ {
+ g = last_stmt (bb);
+ if (g)
+ break;
+ if (!single_pred_p (bb))
+ break;
+ bb = single_pred (bb);
+ }
+ while (1);
+ if (g && gimple_code (g) == GIMPLE_COND)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (g);
+ gsi_prev (&gsi);
+ if (!gsi_end_p (gsi))
+ {
+ g = gsi_stmt (gsi);
+ if (is_gimple_call (g)
+ && gimple_call_internal_p (g)
+ && gimple_call_internal_fn (g) == IFN_LOOP_VECTORIZED
+ && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
+ || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
+ return g;
+ }
+ }
+ return NULL;
+}
+
+/* Fold LOOP_VECTORIZED internal call G to VALUE and
+ update any immediate uses of it's LHS. */
+
+static void
+fold_loop_vectorized_call (gimple g, tree value)
+{
+ tree lhs = gimple_call_lhs (g);
+ use_operand_p use_p;
+ imm_use_iterator iter;
+ gimple use_stmt;
+ gimple_stmt_iterator gsi = gsi_for_stmt (g);
+
+ update_call_from_tree (&gsi, value);
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, value);
+}
+
/* Function vectorize_loops.
Entry point to loop vectorization phase. */
@@ -330,6 +386,8 @@ vectorize_loops (void)
struct loop *loop;
hash_table <simduid_to_vf> simduid_to_vf_htab;
hash_table <simd_array_to_simduid> simd_array_to_simduid_htab;
+ bool any_ifcvt_loops = false;
+ unsigned ret = 0;
vect_loops_num = number_of_loops (cfun);
@@ -352,8 +410,11 @@ vectorize_loops (void)
than all previously defined loops. This fact allows us to run
only over initial loops skipping newly generated ones. */
FOR_EACH_LOOP (loop, 0)
- if ((flag_tree_loop_vectorize && optimize_loop_nest_for_speed_p (loop))
- || loop->force_vect)
+ if (loop->dont_vectorize)
+ any_ifcvt_loops = true;
+ else if ((flag_tree_loop_vectorize
+ && optimize_loop_nest_for_speed_p (loop))
+ || loop->force_vect)
{
loop_vec_info loop_vinfo;
vect_location = find_loop_location (loop);
@@ -372,6 +433,39 @@ vectorize_loops (void)
if (!dbg_cnt (vect_loop))
break;
+ gimple loop_vectorized_call = vect_loop_vectorized_call (loop);
+ if (loop_vectorized_call)
+ {
+ tree arg = gimple_call_arg (loop_vectorized_call, 1);
+ basic_block *bbs;
+ unsigned int i;
+ struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
+
+ LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
+ gcc_checking_assert (vect_loop_vectorized_call
+ (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
+ == loop_vectorized_call);
+ bbs = get_loop_body (scalar_loop);
+ for (i = 0; i < scalar_loop->num_nodes; i++)
+ {
+ basic_block bb = bbs[i];
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple phi = gsi_stmt (gsi);
+ gimple_set_uid (phi, 0);
+ }
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ gimple_set_uid (stmt, 0);
+ }
+ }
+ free (bbs);
+ }
+
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
&& dump_enabled_p ())
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
@@ -392,6 +486,12 @@ vectorize_loops (void)
*simduid_to_vf_htab.find_slot (simduid_to_vf_data, INSERT)
= simduid_to_vf_data;
}
+
+ if (loop_vectorized_call)
+ {
+ fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node);
+ ret |= TODO_cleanup_cfg;
+ }
}
vect_location = UNKNOWN_LOCATION;
@@ -405,6 +505,21 @@ vectorize_loops (void)
/* ----------- Finalize. ----------- */
+ if (any_ifcvt_loops)
+ for (i = 1; i < vect_loops_num; i++)
+ {
+ loop = get_loop (cfun, i);
+ if (loop && loop->dont_vectorize)
+ {
+ gimple g = vect_loop_vectorized_call (loop);
+ if (g)
+ {
+ fold_loop_vectorized_call (g, boolean_false_node);
+ ret |= TODO_cleanup_cfg;
+ }
+ }
+ }
+
for (i = 1; i < vect_loops_num; i++)
{
loop_vec_info loop_vinfo;
@@ -462,7 +577,7 @@ vectorize_loops (void)
return TODO_cleanup_cfg;
}
- return 0;
+ return ret;
}