aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-data-refs.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-data-refs.cc')
-rw-r--r--gcc/tree-vect-data-refs.cc934
1 files changed, 66 insertions, 868 deletions
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index c84cd29..3bf2852 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3956,8 +3956,7 @@ vect_vfa_access_size (vec_info *vinfo, dr_vec_info *dr_info)
}
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
int misalignment;
- if (STMT_VINFO_VEC_STMTS (stmt_vinfo).exists ()
- && ((misalignment = dr_misalignment (dr_info, vectype)), true)
+ if (((misalignment = dr_misalignment (dr_info, vectype)), true)
&& (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
== dr_explicit_realign_optimized))
{
@@ -4539,10 +4538,14 @@ vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
info->ifn = gimple_call_internal_fn (call);
info->decl = NULL_TREE;
info->base = gimple_call_arg (call, 0);
- info->offset = gimple_call_arg (call, 1);
+ info->alias_ptr = gimple_call_arg
+ (call, internal_fn_alias_ptr_index (info->ifn));
+ info->offset = gimple_call_arg
+ (call, internal_fn_offset_index (info->ifn));
info->offset_dt = vect_unknown_def_type;
info->offset_vectype = NULL_TREE;
- info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
+ info->scale = TREE_INT_CST_LOW (gimple_call_arg
+ (call, internal_fn_scale_index (info->ifn)));
info->element_type = TREE_TYPE (vectype);
info->memory_type = TREE_TYPE (DR_REF (dr));
}
@@ -4667,26 +4670,19 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
if (off == NULL_TREE)
off = size_zero_node;
- /* If base is not loop invariant, either off is 0, then we start with just
- the constant offset in the loop invariant BASE and continue with base
- as OFF, otherwise give up.
- We could handle that case by gimplifying the addition of base + off
- into some SSA_NAME and use that as off, but for now punt. */
+ /* BASE must be loop invariant. If it is not invariant, but OFF is, then we
+ * can fix that by swapping BASE and OFF. */
if (!expr_invariant_in_loop_p (loop, base))
{
- if (!integer_zerop (off))
+ if (!expr_invariant_in_loop_p (loop, off))
return false;
- off = base;
- base = size_int (pbytepos);
- }
- /* Otherwise put base + constant offset into the loop invariant BASE
- and continue with OFF. */
- else
- {
- base = fold_convert (sizetype, base);
- base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+
+ std::swap (base, off);
}
+ base = fold_convert (sizetype, base);
+ base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+
/* OFF at this point may be either a SSA_NAME or some tree expression
from get_inner_reference. Try to peel off loop invariants from it
into BASE as long as possible. */
@@ -4864,9 +4860,17 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
offset_vectype = NULL_TREE;
}
+ gcc_checking_assert (expr_invariant_in_loop_p (loop, base));
+ gcc_checking_assert (!expr_invariant_in_loop_p (loop, off));
+
info->ifn = ifn;
info->decl = decl;
info->base = base;
+
+ info->alias_ptr = build_int_cst
+ (reference_alias_ptr_type (DR_REF (dr)),
+ get_object_alignment (DR_REF (dr)));
+
info->offset = off;
info->offset_dt = vect_unknown_def_type;
info->offset_vectype = offset_vectype;
@@ -5056,7 +5060,7 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
*/
opt_result
-vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
+vect_analyze_data_refs (vec_info *vinfo, bool *fatal)
{
class loop *loop = NULL;
unsigned int i;
@@ -5075,7 +5079,6 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
- poly_uint64 vf;
gcc_assert (DR_REF (dr));
stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
@@ -5267,11 +5270,6 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
stmt_info->stmt, vectype);
}
- /* Adjust the minimal vectorization factor according to the
- vector type. */
- vf = TYPE_VECTOR_SUBPARTS (vectype);
- *min_vf = upper_bound (*min_vf, vf);
-
/* Leave the BB vectorizer to pick the vector type later, based on
the final dataref group size and SLP node size. */
if (is_a <loop_vec_info> (vinfo))
@@ -5728,8 +5726,7 @@ vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
create_iv (aggr_ptr_init, PLUS_EXPR,
- fold_convert (aggr_ptr_type, iv_step),
- aggr_ptr, loop, &incr_gsi, insert_after,
+ iv_step, aggr_ptr, loop, &incr_gsi, insert_after,
&indx_before_incr, &indx_after_incr);
incr = gsi_stmt (incr_gsi);
@@ -5757,7 +5754,7 @@ vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
{
standard_iv_increment_position (containing_loop, &incr_gsi,
&insert_after);
- create_iv (aptr, PLUS_EXPR, fold_convert (aggr_ptr_type, DR_STEP (dr)),
+ create_iv (aptr, PLUS_EXPR, DR_STEP (dr),
aggr_ptr, containing_loop, &incr_gsi, insert_after,
&indx_before_incr, &indx_after_incr);
incr = gsi_stmt (incr_gsi);
@@ -6080,204 +6077,6 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
}
-/* Function vect_permute_store_chain.
-
- Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
- a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
- the data correctly for the stores. Return the final references for stores
- in RESULT_CHAIN.
-
- E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
- The input is 4 vectors each containing 8 elements. We assign a number to
- each element, the input sequence is:
-
- 1st vec: 0 1 2 3 4 5 6 7
- 2nd vec: 8 9 10 11 12 13 14 15
- 3rd vec: 16 17 18 19 20 21 22 23
- 4th vec: 24 25 26 27 28 29 30 31
-
- The output sequence should be:
-
- 1st vec: 0 8 16 24 1 9 17 25
- 2nd vec: 2 10 18 26 3 11 19 27
- 3rd vec: 4 12 20 28 5 13 21 30
- 4th vec: 6 14 22 30 7 15 23 31
-
- i.e., we interleave the contents of the four vectors in their order.
-
- We use interleave_high/low instructions to create such output. The input of
- each interleave_high/low operation is two vectors:
- 1st vec 2nd vec
- 0 1 2 3 4 5 6 7
- the even elements of the result vector are obtained left-to-right from the
- high/low elements of the first vector. The odd elements of the result are
- obtained left-to-right from the high/low elements of the second vector.
- The output of interleave_high will be: 0 4 1 5
- and of interleave_low: 2 6 3 7
-
-
- The permutation is done in log LENGTH stages. In each stage interleave_high
- and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
- where the first argument is taken from the first half of DR_CHAIN and the
- second argument from it's second half.
- In our example,
-
- I1: interleave_high (1st vec, 3rd vec)
- I2: interleave_low (1st vec, 3rd vec)
- I3: interleave_high (2nd vec, 4th vec)
- I4: interleave_low (2nd vec, 4th vec)
-
- The output for the first stage is:
-
- I1: 0 16 1 17 2 18 3 19
- I2: 4 20 5 21 6 22 7 23
- I3: 8 24 9 25 10 26 11 27
- I4: 12 28 13 29 14 30 15 31
-
- The output of the second stage, i.e. the final result is:
-
- I1: 0 8 16 24 1 9 17 25
- I2: 2 10 18 26 3 11 19 27
- I3: 4 12 20 28 5 13 21 30
- I4: 6 14 22 30 7 15 23 31. */
-
-void
-vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
- unsigned int length,
- stmt_vec_info stmt_info,
- gimple_stmt_iterator *gsi,
- vec<tree> *result_chain)
-{
- tree vect1, vect2, high, low;
- gimple *perm_stmt;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- tree perm_mask_low, perm_mask_high;
- tree data_ref;
- tree perm3_mask_low, perm3_mask_high;
- unsigned int i, j, n, log_length = exact_log2 (length);
-
- result_chain->quick_grow (length);
- memcpy (result_chain->address (), dr_chain.address (),
- length * sizeof (tree));
-
- if (length == 3)
- {
- /* vect_grouped_store_supported ensures that this is constant. */
- unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
- unsigned int j0 = 0, j1 = 0, j2 = 0;
-
- vec_perm_builder sel (nelt, nelt, 1);
- sel.quick_grow (nelt);
- vec_perm_indices indices;
- for (j = 0; j < 3; j++)
- {
- int nelt0 = ((3 - j) * nelt) % 3;
- int nelt1 = ((3 - j) * nelt + 1) % 3;
- int nelt2 = ((3 - j) * nelt + 2) % 3;
-
- for (i = 0; i < nelt; i++)
- {
- if (3 * i + nelt0 < nelt)
- sel[3 * i + nelt0] = j0++;
- if (3 * i + nelt1 < nelt)
- sel[3 * i + nelt1] = nelt + j1++;
- if (3 * i + nelt2 < nelt)
- sel[3 * i + nelt2] = 0;
- }
- indices.new_vector (sel, 2, nelt);
- perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0; i < nelt; i++)
- {
- if (3 * i + nelt0 < nelt)
- sel[3 * i + nelt0] = 3 * i + nelt0;
- if (3 * i + nelt1 < nelt)
- sel[3 * i + nelt1] = 3 * i + nelt1;
- if (3 * i + nelt2 < nelt)
- sel[3 * i + nelt2] = nelt + j2++;
- }
- indices.new_vector (sel, 2, nelt);
- perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
- vect1 = dr_chain[0];
- vect2 = dr_chain[1];
-
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {j, nelt, *, j + 1, nelt + j + 1, *,
- j + 2, nelt + j + 2, *, ...}> */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
- vect2, perm3_mask_low);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
- vect1 = data_ref;
- vect2 = dr_chain[2];
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {0, 1, nelt + j, 3, 4, nelt + j + 1,
- 6, 7, nelt + j + 2, ...}> */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
- vect2, perm3_mask_high);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[j] = data_ref;
- }
- }
- else
- {
- /* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (pow2p_hwi (length));
-
- /* The encoding has 2 interleaved stepped patterns. */
- poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
- vec_perm_builder sel (nelt, 2, 3);
- sel.quick_grow (6);
- for (i = 0; i < 3; i++)
- {
- sel[i * 2] = i;
- sel[i * 2 + 1] = i + nelt;
- }
- vec_perm_indices indices (sel, 2, nelt);
- perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0; i < 6; i++)
- sel[i] += exact_div (nelt, 2);
- indices.new_vector (sel, 2, nelt);
- perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0, n = log_length; i < n; i++)
- {
- for (j = 0; j < length/2; j++)
- {
- vect1 = dr_chain[j];
- vect2 = dr_chain[j+length/2];
-
- /* Create interleaving stmt:
- high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
- ...}> */
- high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
- perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
- vect2, perm_mask_high);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[2*j] = high;
-
- /* Create interleaving stmt:
- low = VEC_PERM_EXPR <vect1, vect2,
- {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
- ...}> */
- low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
- perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
- vect2, perm_mask_low);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[2*j+1] = low;
- }
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
- }
- }
-}
-
/* Function vect_setup_realignment
This function is called when vectorizing an unaligned load using
@@ -6708,633 +6507,6 @@ vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
return IFN_LAST;
}
-/* Function vect_permute_load_chain.
-
- Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
- a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
- the input data correctly. Return the final references for loads in
- RESULT_CHAIN.
-
- E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
- The input is 4 vectors each containing 8 elements. We assign a number to each
- element, the input sequence is:
-
- 1st vec: 0 1 2 3 4 5 6 7
- 2nd vec: 8 9 10 11 12 13 14 15
- 3rd vec: 16 17 18 19 20 21 22 23
- 4th vec: 24 25 26 27 28 29 30 31
-
- The output sequence should be:
-
- 1st vec: 0 4 8 12 16 20 24 28
- 2nd vec: 1 5 9 13 17 21 25 29
- 3rd vec: 2 6 10 14 18 22 26 30
- 4th vec: 3 7 11 15 19 23 27 31
-
- i.e., the first output vector should contain the first elements of each
- interleaving group, etc.
-
- We use extract_even/odd instructions to create such output. The input of
- each extract_even/odd operation is two vectors
- 1st vec 2nd vec
- 0 1 2 3 4 5 6 7
-
- and the output is the vector of extracted even/odd elements. The output of
- extract_even will be: 0 2 4 6
- and of extract_odd: 1 3 5 7
-
-
- The permutation is done in log LENGTH stages. In each stage extract_even
- and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
- their order. In our example,
-
- E1: extract_even (1st vec, 2nd vec)
- E2: extract_odd (1st vec, 2nd vec)
- E3: extract_even (3rd vec, 4th vec)
- E4: extract_odd (3rd vec, 4th vec)
-
- The output for the first stage will be:
-
- E1: 0 2 4 6 8 10 12 14
- E2: 1 3 5 7 9 11 13 15
- E3: 16 18 20 22 24 26 28 30
- E4: 17 19 21 23 25 27 29 31
-
- In order to proceed and create the correct sequence for the next stage (or
- for the correct output, if the second stage is the last one, as in our
- example), we first put the output of extract_even operation and then the
- output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
- The input for the second stage is:
-
- 1st vec (E1): 0 2 4 6 8 10 12 14
- 2nd vec (E3): 16 18 20 22 24 26 28 30
- 3rd vec (E2): 1 3 5 7 9 11 13 15
- 4th vec (E4): 17 19 21 23 25 27 29 31
-
- The output of the second stage:
-
- E1: 0 4 8 12 16 20 24 28
- E2: 2 6 10 14 18 22 26 30
- E3: 1 5 9 13 17 21 25 29
- E4: 3 7 11 15 19 23 27 31
-
- And RESULT_CHAIN after reordering:
-
- 1st vec (E1): 0 4 8 12 16 20 24 28
- 2nd vec (E3): 1 5 9 13 17 21 25 29
- 3rd vec (E2): 2 6 10 14 18 22 26 30
- 4th vec (E4): 3 7 11 15 19 23 27 31. */
-
-static void
-vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
- unsigned int length,
- stmt_vec_info stmt_info,
- gimple_stmt_iterator *gsi,
- vec<tree> *result_chain)
-{
- tree data_ref, first_vect, second_vect;
- tree perm_mask_even, perm_mask_odd;
- tree perm3_mask_low, perm3_mask_high;
- gimple *perm_stmt;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- unsigned int i, j, log_length = exact_log2 (length);
-
- result_chain->quick_grow (length);
- memcpy (result_chain->address (), dr_chain.address (),
- length * sizeof (tree));
-
- if (length == 3)
- {
- /* vect_grouped_load_supported ensures that this is constant. */
- unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
- unsigned int k;
-
- vec_perm_builder sel (nelt, nelt, 1);
- sel.quick_grow (nelt);
- vec_perm_indices indices;
- for (k = 0; k < 3; k++)
- {
- for (i = 0; i < nelt; i++)
- if (3 * i + k < 2 * nelt)
- sel[i] = 3 * i + k;
- else
- sel[i] = 0;
- indices.new_vector (sel, 2, nelt);
- perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0, j = 0; i < nelt; i++)
- if (3 * i + k < 2 * nelt)
- sel[i] = i;
- else
- sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
- indices.new_vector (sel, 2, nelt);
- perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
- first_vect = dr_chain[0];
- second_vect = dr_chain[1];
-
- /* Create interleaving stmt (low part of):
- low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
- ...}> */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
- second_vect, perm3_mask_low);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
- /* Create interleaving stmt (high part of):
- high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
- ...}> */
- first_vect = data_ref;
- second_vect = dr_chain[2];
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
- second_vect, perm3_mask_high);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[k] = data_ref;
- }
- }
- else
- {
- /* If length is not equal to 3 then only power of 2 is supported. */
- gcc_assert (pow2p_hwi (length));
-
- /* The encoding has a single stepped pattern. */
- poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
- vec_perm_builder sel (nelt, 1, 3);
- sel.quick_grow (3);
- for (i = 0; i < 3; ++i)
- sel[i] = i * 2;
- vec_perm_indices indices (sel, 2, nelt);
- perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0; i < 3; ++i)
- sel[i] = i * 2 + 1;
- indices.new_vector (sel, 2, nelt);
- perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0; i < log_length; i++)
- {
- for (j = 0; j < length; j += 2)
- {
- first_vect = dr_chain[j];
- second_vect = dr_chain[j+1];
-
- /* data_ref = permute_even (first_data_ref, second_data_ref); */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- first_vect, second_vect,
- perm_mask_even);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[j/2] = data_ref;
-
- /* data_ref = permute_odd (first_data_ref, second_data_ref); */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- first_vect, second_vect,
- perm_mask_odd);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[j/2+length/2] = data_ref;
- }
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
- }
- }
-}
-
-/* Function vect_shift_permute_load_chain.
-
- Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
- sequence of stmts to reorder the input data accordingly.
- Return the final references for loads in RESULT_CHAIN.
- Return true if successed, false otherwise.
-
- E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
- The input is 3 vectors each containing 8 elements. We assign a
- number to each element, the input sequence is:
-
- 1st vec: 0 1 2 3 4 5 6 7
- 2nd vec: 8 9 10 11 12 13 14 15
- 3rd vec: 16 17 18 19 20 21 22 23
-
- The output sequence should be:
-
- 1st vec: 0 3 6 9 12 15 18 21
- 2nd vec: 1 4 7 10 13 16 19 22
- 3rd vec: 2 5 8 11 14 17 20 23
-
- We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
-
- First we shuffle all 3 vectors to get correct elements order:
-
- 1st vec: ( 0 3 6) ( 1 4 7) ( 2 5)
- 2nd vec: ( 8 11 14) ( 9 12 15) (10 13)
- 3rd vec: (16 19 22) (17 20 23) (18 21)
-
- Next we unite and shift vector 3 times:
-
- 1st step:
- shift right by 6 the concatenation of:
- "1st vec" and "2nd vec"
- ( 0 3 6) ( 1 4 7) |( 2 5) _ ( 8 11 14) ( 9 12 15)| (10 13)
- "2nd vec" and "3rd vec"
- ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
- "3rd vec" and "1st vec"
- (16 19 22) (17 20 23) |(18 21) _ ( 0 3 6) ( 1 4 7)| ( 2 5)
- | New vectors |
-
- So that now new vectors are:
-
- 1st vec: ( 2 5) ( 8 11 14) ( 9 12 15)
- 2nd vec: (10 13) (16 19 22) (17 20 23)
- 3rd vec: (18 21) ( 0 3 6) ( 1 4 7)
-
- 2nd step:
- shift right by 5 the concatenation of:
- "1st vec" and "3rd vec"
- ( 2 5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0 3 6)| ( 1 4 7)
- "2nd vec" and "1st vec"
- (10 13) (16 19 22) |(17 20 23) _ ( 2 5) ( 8 11 14)| ( 9 12 15)
- "3rd vec" and "2nd vec"
- (18 21) ( 0 3 6) |( 1 4 7) _ (10 13) (16 19 22)| (17 20 23)
- | New vectors |
-
- So that now new vectors are:
-
- 1st vec: ( 9 12 15) (18 21) ( 0 3 6)
- 2nd vec: (17 20 23) ( 2 5) ( 8 11 14)
- 3rd vec: ( 1 4 7) (10 13) (16 19 22) READY
-
- 3rd step:
- shift right by 5 the concatenation of:
- "1st vec" and "1st vec"
- ( 9 12 15) (18 21) |( 0 3 6) _ ( 9 12 15) (18 21)| ( 0 3 6)
- shift right by 3 the concatenation of:
- "2nd vec" and "2nd vec"
- (17 20 23) |( 2 5) ( 8 11 14) _ (17 20 23)| ( 2 5) ( 8 11 14)
- | New vectors |
-
- So that now all vectors are READY:
- 1st vec: ( 0 3 6) ( 9 12 15) (18 21)
- 2nd vec: ( 2 5) ( 8 11 14) (17 20 23)
- 3rd vec: ( 1 4 7) (10 13) (16 19 22)
-
- This algorithm is faster than one in vect_permute_load_chain if:
- 1. "shift of a concatination" is faster than general permutation.
- This is usually so.
- 2. The TARGET machine can't execute vector instructions in parallel.
- This is because each step of the algorithm depends on previous.
- The algorithm in vect_permute_load_chain is much more parallel.
-
- The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
-*/
-
-static bool
-vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
- unsigned int length,
- stmt_vec_info stmt_info,
- gimple_stmt_iterator *gsi,
- vec<tree> *result_chain)
-{
- tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
- tree perm2_mask1, perm2_mask2, perm3_mask;
- tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
- gimple *perm_stmt;
-
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- machine_mode vmode = TYPE_MODE (vectype);
- unsigned int i;
- loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
-
- unsigned HOST_WIDE_INT nelt, vf;
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
- || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
- /* Not supported for variable-length vectors. */
- return false;
-
- vec_perm_builder sel (nelt, nelt, 1);
- sel.quick_grow (nelt);
-
- result_chain->quick_grow (length);
- memcpy (result_chain->address (), dr_chain.address (),
- length * sizeof (tree));
-
- if (pow2p_hwi (length) && vf > 4)
- {
- unsigned int j, log_length = exact_log2 (length);
- for (i = 0; i < nelt / 2; ++i)
- sel[i] = i * 2;
- for (i = 0; i < nelt / 2; ++i)
- sel[nelt / 2 + i] = i * 2 + 1;
- vec_perm_indices indices (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shuffle of 2 fields structure is not \
- supported by target\n");
- return false;
- }
- perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0; i < nelt / 2; ++i)
- sel[i] = i * 2 + 1;
- for (i = 0; i < nelt / 2; ++i)
- sel[nelt / 2 + i] = i * 2;
- indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shuffle of 2 fields structure is not \
- supported by target\n");
- return false;
- }
- perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {4 5 6 7 8 9 10 11}. */
- for (i = 0; i < nelt; i++)
- sel[i] = nelt / 2 + i;
- indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
-
- /* Generating permutation constant to select vector from 2.
- For vector length 8 it is {0 1 2 3 12 13 14 15}. */
- for (i = 0; i < nelt / 2; i++)
- sel[i] = i;
- for (i = nelt / 2; i < nelt; i++)
- sel[i] = nelt + i;
- indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "select is not supported by target\n");
- return false;
- }
- select_mask = vect_gen_perm_mask_checked (vectype, indices);
-
- for (i = 0; i < log_length; i++)
- {
- for (j = 0; j < length; j += 2)
- {
- first_vect = dr_chain[j];
- second_vect = dr_chain[j + 1];
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- first_vect, first_vect,
- perm2_mask1);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- vect[0] = data_ref;
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- second_vect, second_vect,
- perm2_mask2);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- vect[1] = data_ref;
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- vect[0], vect[1], shift1_mask);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[j/2 + length/2] = data_ref;
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- vect[0], vect[1], select_mask);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[j/2] = data_ref;
- }
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
- }
- return true;
- }
- if (length == 3 && vf > 2)
- {
- unsigned int k = 0, l = 0;
-
- /* Generating permutation constant to get all elements in rigth order.
- For vector length 8 it is {0 3 6 1 4 7 2 5}. */
- for (i = 0; i < nelt; i++)
- {
- if (3 * k + (l % 3) >= nelt)
- {
- k = 0;
- l += (3 - (nelt % 3));
- }
- sel[i] = 3 * k + (l % 3);
- k++;
- }
- vec_perm_indices indices (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shuffle of 3 fields structure is not \
- supported by target\n");
- return false;
- }
- perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {6 7 8 9 10 11 12 13}. */
- for (i = 0; i < nelt; i++)
- sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
- indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {5 6 7 8 9 10 11 12}. */
- for (i = 0; i < nelt; i++)
- sel[i] = 2 * (nelt / 3) + 1 + i;
- indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {3 4 5 6 7 8 9 10}. */
- for (i = 0; i < nelt; i++)
- sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
- indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
-
- /* Generating permutation constant to shift all elements.
- For vector length 8 it is {5 6 7 8 9 10 11 12}. */
- for (i = 0; i < nelt; i++)
- sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
- indices.new_vector (sel, 2, nelt);
- if (!can_vec_perm_const_p (vmode, vmode, indices))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "shift permutation is not supported by target\n");
- return false;
- }
- shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
-
- for (k = 0; k < 3; k++)
- {
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- dr_chain[k], dr_chain[k],
- perm3_mask);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- vect[k] = data_ref;
- }
-
- for (k = 0; k < 3; k++)
- {
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- vect[k % 3], vect[(k + 1) % 3],
- shift1_mask);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- vect_shift[k] = data_ref;
- }
-
- for (k = 0; k < 3; k++)
- {
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
- vect_shift[(4 - k) % 3],
- vect_shift[(3 - k) % 3],
- shift2_mask);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- vect[k] = data_ref;
- }
-
- (*result_chain)[3 - (nelt % 3)] = vect[2];
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
- vect[0], shift3_mask);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[nelt % 3] = data_ref;
-
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
- vect[1], shift4_mask);
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
- (*result_chain)[0] = data_ref;
- return true;
- }
- return false;
-}
-
-/* Function vect_transform_grouped_load.
-
- Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
- to perform their permutation and ascribe the result vectorized statements to
- the scalar statements.
-*/
-
-void
-vect_transform_grouped_load (vec_info *vinfo, stmt_vec_info stmt_info,
- vec<tree> dr_chain,
- int size, gimple_stmt_iterator *gsi)
-{
- machine_mode mode;
- vec<tree> result_chain = vNULL;
-
- /* DR_CHAIN contains input data-refs that are a part of the interleaving.
- RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
- vectors, that are ready for vector computation. */
- result_chain.create (size);
-
- /* If reassociation width for vector type is 2 or greater target machine can
- execute 2 or more vector instructions in parallel. Otherwise try to
- get chain for loads group using vect_shift_permute_load_chain. */
- mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
- if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
- || pow2p_hwi (size)
- || !vect_shift_permute_load_chain (vinfo, dr_chain, size, stmt_info,
- gsi, &result_chain))
- vect_permute_load_chain (vinfo, dr_chain,
- size, stmt_info, gsi, &result_chain);
- vect_record_grouped_load_vectors (vinfo, stmt_info, result_chain);
- result_chain.release ();
-}
-
-/* RESULT_CHAIN contains the output of a group of grouped loads that were
- generated as part of the vectorization of STMT_INFO. Assign the statement
- for each vector to the associated scalar statement. */
-
-void
-vect_record_grouped_load_vectors (vec_info *, stmt_vec_info stmt_info,
- vec<tree> result_chain)
-{
- stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
- unsigned int i, gap_count;
- tree tmp_data_ref;
-
- /* Put a permuted data-ref in the VECTORIZED_STMT field.
- Since we scan the chain starting from it's first node, their order
- corresponds the order of data-refs in RESULT_CHAIN. */
- stmt_vec_info next_stmt_info = first_stmt_info;
- gap_count = 1;
- FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
- {
- if (!next_stmt_info)
- break;
-
- /* Skip the gaps. Loads created for the gaps will be removed by dead
- code elimination pass later. No need to check for the first stmt in
- the group, since it always exists.
- DR_GROUP_GAP is the number of steps in elements from the previous
- access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
- correspond to the gaps. */
- if (next_stmt_info != first_stmt_info
- && gap_count < DR_GROUP_GAP (next_stmt_info))
- {
- gap_count++;
- continue;
- }
-
- /* ??? The following needs cleanup after the removal of
- DR_GROUP_SAME_DR_STMT. */
- if (next_stmt_info)
- {
- gimple *new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
- /* We assume that if VEC_STMT is not NULL, this is a case of multiple
- copies, and we put the new vector statement last. */
- STMT_VINFO_VEC_STMTS (next_stmt_info).safe_push (new_stmt);
-
- next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
- gap_count = 1;
- }
- }
-}
-
/* Function vect_force_dr_alignment_p.
Returns whether the alignment of a DECL can be forced to be aligned
@@ -7362,13 +6534,14 @@ vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
alignment.
If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
it is aligned, i.e., check if it is possible to vectorize it with different
- alignment. */
+ alignment. If GS_INFO is passed we are dealing with a gather/scatter. */
enum dr_alignment_support
vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
- tree vectype, int misalignment)
+ tree vectype, int misalignment,
+ gather_scatter_info *gs_info)
{
- data_reference *dr = dr_info->dr;
+ data_reference *dr = dr_info ? dr_info->dr : nullptr;
stmt_vec_info stmt_info = dr_info->stmt;
machine_mode mode = TYPE_MODE (vectype);
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
@@ -7380,14 +6553,6 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
else if (dr_safe_speculative_read_required (stmt_info))
return dr_unaligned_unsupported;
- /* For now assume all conditional loads/stores support unaligned
- access without any special code. */
- if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
- if (gimple_call_internal_p (stmt)
- && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
- || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
- return dr_unaligned_supported;
-
if (loop_vinfo)
{
vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -7457,7 +6622,7 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
}
} */
- if (DR_IS_READ (dr))
+ if (dr && DR_IS_READ (dr))
{
if (can_implement_p (vec_realign_load_optab, mode)
&& (!targetm.vectorize.builtin_mask_for_load
@@ -7485,10 +6650,43 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
bool is_packed = false;
tree type = TREE_TYPE (DR_REF (dr));
+ bool is_gather_scatter = gs_info != nullptr;
if (misalignment == DR_MISALIGNMENT_UNKNOWN)
- is_packed = not_size_aligned (DR_REF (dr));
+ {
+ if (!is_gather_scatter || dr != nullptr)
+ is_packed = not_size_aligned (DR_REF (dr));
+ else
+ {
+ /* Gather-scatter accesses normally perform only component accesses
+ so alignment is irrelevant for them. Targets like riscv do care
+ about scalar alignment in vector accesses, though, so check scalar
+ alignment here. We determined the alias pointer as well as the
+ base alignment during pattern recognition and can re-use it here.
+
+ As we do not have an analyzed dataref we only know the alignment
+ of the reference itself and nothing about init, steps, etc.
+ For now don't try harder to determine misalignment and
+ just assume it is unknown. We consider the type packed if its
+ scalar alignment is lower than the natural alignment of a vector
+ element's type. */
+
+ gcc_assert (!GATHER_SCATTER_LEGACY_P (*gs_info));
+ gcc_assert (dr == nullptr);
+
+ tree inner_vectype = TREE_TYPE (vectype);
+
+ unsigned HOST_WIDE_INT scalar_align
+ = tree_to_uhwi (gs_info->alias_ptr);
+ unsigned HOST_WIDE_INT inner_vectype_sz
+ = tree_to_uhwi (TYPE_SIZE (inner_vectype));
+
+ bool is_misaligned = scalar_align < inner_vectype_sz;
+ is_packed = scalar_align > 1 && is_misaligned;
+ }
+ }
if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
- is_packed))
+ is_packed,
+ is_gather_scatter))
return dr_unaligned_supported;
/* Unsupported. */