aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPan Li <pan2.li@intel.com>2024-05-16 09:58:13 +0800
committerPan Li <pan2.li@intel.com>2024-05-16 20:25:31 +0800
commit57f8a2f67c1536be23231808ab00613ab69193ed (patch)
treef6e763ad94328a1947d62c63e97741acc6c59732
parentd4dee347b3fe1982bab26485ff31cd039c9df010 (diff)
downloadgcc-57f8a2f67c1536be23231808ab00613ab69193ed.zip
gcc-57f8a2f67c1536be23231808ab00613ab69193ed.tar.gz
gcc-57f8a2f67c1536be23231808ab00613ab69193ed.tar.bz2
Vect: Support loop len in vectorizable early exit
This patch adds early break auto-vectorization support for target which use length on partial vectorization. Consider this following example: unsigned vect_a[802]; unsigned vect_b[802]; void test (unsigned x, int n) { for (int i = 0; i < n; i++) { vect_b[i] = x + i; if (vect_a[i] > x) break; vect_a[i] = x; } } We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias). And then the IR of RVV looks like below: ... _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]); _55 = (int) _87; ... mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67; vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \ {0, ... }, _87, 0); if (vec_len_mask_72 != { 0, ... }) goto <bb 6>; [5.50%] else goto <bb 7>; [94.50%] The below tests are passed for this patch: 1. The riscv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. gcc/ChangeLog: * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen the loop len mask. * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the vect_gen_loop_len_mask for 1 or more stmt(s). * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl for vect_gen_loop_len_mask. Signed-off-by: Pan Li <pan2.li@intel.com>
-rw-r--r--gcc/tree-vect-loop.cc27
-rw-r--r--gcc/tree-vect-stmts.cc17
-rw-r--r--gcc/tree-vectorizer.h4
3 files changed, 46 insertions, 2 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 361aec0..83c0544 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
return loop_len;
}
+/* Generate the tree for the loop len mask and return it. Given the lens,
+ nvectors, vectype, index and factor to gen the len mask as below.
+
+ tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
+*/
+tree
+vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
+ gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
+ unsigned int nvectors, tree vectype, tree stmt,
+ unsigned int index, unsigned int factor)
+{
+ tree all_one_mask = build_all_ones_cst (vectype);
+ tree all_zero_mask = build_zero_cst (vectype);
+ tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index,
+ factor);
+ tree bias = build_int_cst (intQI_type_node,
+ LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
+ tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
+ gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
+ all_one_mask, all_zero_mask, len,
+ bias);
+ gimple_call_set_lhs (call, len_mask);
+ gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
+
+ return len_mask;
+}
+
/* Scale profiling counters by estimation for LOOP which is vectorized
by factor VF.
If FLAT is true, the loop we started with had unrealistically flat
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b8a7160..6729595 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
ncopies = vect_get_num_copies (loop_vinfo, vectype);
vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
/* Now build the new conditional. Pattern gimple_conds get dropped during
codegen so we must replace the original insn. */
@@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
{
if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
OPTIMIZE_FOR_SPEED))
- return false;
+ vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
else
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
}
-
return true;
}
@@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
stmts[i], &cond_gsi);
workset.quick_push (stmt_mask);
}
+ else if (len_loop_p)
+ for (unsigned i = 0; i < stmts.length (); i++)
+ {
+ tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
+ lens, ncopies, vectype,
+ stmts[i], i, 1);
+
+ workset.quick_push (len_mask);
+ }
else
workset.splice (stmts);
@@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
new_temp, &cond_gsi);
}
+ else if (len_loop_p)
+ new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
+ ncopies, vectype, new_temp, 0, 1);
}
gcc_assert (new_temp);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index db44d73..93bc30e 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int,
extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
vec_loop_lens *, unsigned int, tree,
unsigned int, unsigned int);
+extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
+ gimple_stmt_iterator *, vec_loop_lens *,
+ unsigned int, tree, tree, unsigned int,
+ unsigned int);
extern gimple_seq vect_gen_len (tree, tree, tree, tree);
extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);