aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-data-refs.c
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2015-06-18 09:39:13 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2015-06-18 09:39:13 +0000
commit91ff150410721f627691550dfe871907c6b0175e (patch)
treeda75024fa87089bd1e346a57d5ce2782e4667380 /gcc/tree-vect-data-refs.c
parentd38052e048a04a21beedcf61822f4a494be3a747 (diff)
downloadgcc-91ff150410721f627691550dfe871907c6b0175e.zip
gcc-91ff150410721f627691550dfe871907c6b0175e.tar.gz
gcc-91ff150410721f627691550dfe871907c6b0175e.tar.bz2
re PR tree-optimization/66510 (gcc.target/arm/pr53636.c FAILs after r224221)
2015-06-18 Richard Biener <rguenther@suse.de> PR tree-optimization/66510 * tree-vect-stmts.c (vectorizable_load): Properly compute the number of vector loads for SLP permuted loads. * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also check the stride for loop vectorization. (vect_enhance_data_refs_alignment): Deal with SLP adjusted vectorization factor. (vect_analyze_group_access): If the group size is not a power of two require a epilogue loop. * tree-vect-loop.c (vect_analyze_loop_2): Move alignment compute and optimizing and alias test pruning after final vectorization factor computation. * tree-vect-slp.c (vect_build_slp_tree_1): Remove check on vector alignment. (vect_transform_slp_perm_load): Properly compute the original number of vector load stmts. * gcc.dg/vect/slp-perm-12.c: New testcase. From-SVN: r224598
Diffstat (limited to 'gcc/tree-vect-data-refs.c')
-rw-r--r--gcc/tree-vect-data-refs.c35
1 files changed, 23 insertions, 12 deletions
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 3fc1226..b626e38 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -691,21 +691,22 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
}
}
- /* Similarly, if we're doing basic-block vectorization, we can only use
- base and misalignment information relative to an innermost loop if the
- misalignment stays the same throughout the execution of the loop.
- As above, this is the case if the stride of the dataref evenly divides
- by the vector size. */
- if (!loop)
+ /* Similarly we can only use base and misalignment information relative to
+ an innermost loop if the misalignment stays the same throughout the
+ execution of the loop. As above, this is the case if the stride of
+ the dataref evenly divides by the vector size. */
+ else
{
tree step = DR_STEP (dr);
+ unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
if (tree_fits_shwi_p (step)
- && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
+ && ((tree_to_shwi (step) * vf)
+ % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "SLP: step doesn't divide the vector-size.\n");
+ "step doesn't divide the vector-size.\n");
misalign = NULL_TREE;
}
}
@@ -1440,7 +1441,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
We do this automtically for cost model, since we calculate cost
for every peeling option. */
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
- possible_npeel_number = vf /nelements;
+ {
+ if (STMT_SLP_TYPE (stmt_info))
+ possible_npeel_number
+ = (vf * GROUP_SIZE (stmt_info)) / nelements;
+ else
+ possible_npeel_number = vf / nelements;
+ }
/* Handle the aligned case. We may decide to align some other
access, making DR unaligned. */
@@ -1453,7 +1460,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
for (j = 0; j < possible_npeel_number; j++)
{
- gcc_assert (npeel_tmp <= vf);
vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
npeel_tmp += nelements;
}
@@ -2230,8 +2236,13 @@ vect_analyze_group_access (struct data_reference *dr)
BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
}
- /* There is a gap in the end of the group. */
- if (groupsize - last_accessed_element > 0 && loop_vinfo)
+ /* If there is a gap in the end of the group or the group size cannot
+ be made a multiple of the vector element count then we access excess
+ elements in the last iteration and thus need to peel that off. */
+ if (loop_vinfo
+ && (groupsize - last_accessed_element > 0
+ || exact_log2 (groupsize) == -1))
+
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,