diff options
author | Pan Li <pan2.li@intel.com> | 2024-05-16 09:58:13 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2024-05-16 20:25:31 +0800 |
commit | 57f8a2f67c1536be23231808ab00613ab69193ed (patch) | |
tree | f6e763ad94328a1947d62c63e97741acc6c59732 /gcc | |
parent | d4dee347b3fe1982bab26485ff31cd039c9df010 (diff) | |
download | gcc-57f8a2f67c1536be23231808ab00613ab69193ed.zip gcc-57f8a2f67c1536be23231808ab00613ab69193ed.tar.gz gcc-57f8a2f67c1536be23231808ab00613ab69193ed.tar.bz2 |
Vect: Support loop len in vectorizable early exit
This patch adds early break auto-vectorization support for target which
use length on partial vectorization. Consider this following example:
unsigned vect_a[802];
unsigned vect_b[802];
void test (unsigned x, int n)
{
for (int i = 0; i < n; i++)
{
vect_b[i] = x + i;
if (vect_a[i] > x)
break;
vect_a[i] = x;
}
}
We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
And then the IR of RVV looks like below:
...
_87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
_55 = (int) _87;
...
mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
{0, ... }, _87, 0);
if (vec_len_mask_72 != { 0, ... })
goto <bb 6>; [5.50%]
else
goto <bb 7>; [94.50%]
The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.
gcc/ChangeLog:
* tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
the loop len mask.
* tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
vect_gen_loop_len_mask for 1 or more stmt(s).
* tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
for vect_gen_loop_len_mask.
Signed-off-by: Pan Li <pan2.li@intel.com>
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 27 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.cc | 17 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 4 |
3 files changed, 46 insertions, 2 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 361aec0..83c0544 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, return loop_len; } +/* Generate the tree for the loop len mask and return it. Given the lens, + nvectors, vectype, index and factor to gen the len mask as below. + + tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias) +*/ +tree +vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, + gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens, + unsigned int nvectors, tree vectype, tree stmt, + unsigned int index, unsigned int factor) +{ + tree all_one_mask = build_all_ones_cst (vectype); + tree all_zero_mask = build_zero_cst (vectype); + tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, index, + factor); + tree bias = build_int_cst (intQI_type_node, + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo)); + tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask"); + gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt, + all_one_mask, all_zero_mask, len, + bias); + gimple_call_set_lhs (call, len_mask); + gsi_insert_before (cond_gsi, call, GSI_SAME_STMT); + + return len_mask; +} + /* Scale profiling counters by estimation for LOOP which is vectorized by factor VF. If FLAT is true, the loop we started with had unrealistically flat diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index b8a7160..6729595 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, ncopies = vect_get_num_copies (loop_vinfo, vectype); vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo); /* Now build the new conditional. Pattern gimple_conds get dropped during codegen so we must replace the original insn. */ @@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, { if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, OPTIMIZE_FOR_SPEED)) - return false; + vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1); else vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); } - return true; } @@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, stmts[i], &cond_gsi); workset.quick_push (stmt_mask); } + else if (len_loop_p) + for (unsigned i = 0; i < stmts.length (); i++) + { + tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, + lens, ncopies, vectype, + stmts[i], i, 1); + + workset.quick_push (len_mask); + } else workset.splice (stmts); @@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, new_temp, &cond_gsi); } + else if (len_loop_p) + new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens, + ncopies, vectype, new_temp, 0, 1); } gcc_assert (new_temp); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index db44d73..93bc30e 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info, vec_loop_lens *, unsigned int, extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *, vec_loop_lens *, unsigned int, tree, unsigned int, unsigned int); +extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *, + gimple_stmt_iterator *, vec_loop_lens *, + unsigned int, tree, tree, unsigned int, + unsigned int); extern gimple_seq vect_gen_len (tree, tree, tree, tree); extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info); extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *); |