diff options
author | Richard Biener <rguenther@suse.de> | 2015-06-18 09:39:13 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2015-06-18 09:39:13 +0000 |
commit | 91ff150410721f627691550dfe871907c6b0175e (patch) | |
tree | da75024fa87089bd1e346a57d5ce2782e4667380 /gcc/tree-vect-data-refs.c | |
parent | d38052e048a04a21beedcf61822f4a494be3a747 (diff) | |
download | gcc-91ff150410721f627691550dfe871907c6b0175e.zip gcc-91ff150410721f627691550dfe871907c6b0175e.tar.gz gcc-91ff150410721f627691550dfe871907c6b0175e.tar.bz2 |
re PR tree-optimization/66510 (gcc.target/arm/pr53636.c FAILs after r224221)
2015-06-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/66510
* tree-vect-stmts.c (vectorizable_load): Properly compute the
number of vector loads for SLP permuted loads.
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also
check the stride for loop vectorization.
(vect_enhance_data_refs_alignment): Deal with SLP adjusted
vectorization factor.
(vect_analyze_group_access): If the group size is not a power
of two require a epilogue loop.
* tree-vect-loop.c (vect_analyze_loop_2): Move alignment
compute and optimizing and alias test pruning after final
vectorization factor computation.
* tree-vect-slp.c (vect_build_slp_tree_1): Remove check on
vector alignment.
(vect_transform_slp_perm_load): Properly compute the original
number of vector load stmts.
* gcc.dg/vect/slp-perm-12.c: New testcase.
From-SVN: r224598
Diffstat (limited to 'gcc/tree-vect-data-refs.c')
-rw-r--r-- | gcc/tree-vect-data-refs.c | 35 |
1 files changed, 23 insertions, 12 deletions
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 3fc1226..b626e38 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -691,21 +691,22 @@ vect_compute_data_ref_alignment (struct data_reference *dr) } } - /* Similarly, if we're doing basic-block vectorization, we can only use - base and misalignment information relative to an innermost loop if the - misalignment stays the same throughout the execution of the loop. - As above, this is the case if the stride of the dataref evenly divides - by the vector size. */ - if (!loop) + /* Similarly we can only use base and misalignment information relative to + an innermost loop if the misalignment stays the same throughout the + execution of the loop. As above, this is the case if the stride of + the dataref evenly divides by the vector size. */ + else { tree step = DR_STEP (dr); + unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1; if (tree_fits_shwi_p (step) - && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) + && ((tree_to_shwi (step) * vf) + % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "SLP: step doesn't divide the vector-size.\n"); + "step doesn't divide the vector-size.\n"); misalign = NULL_TREE; } } @@ -1440,7 +1441,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) We do this automtically for cost model, since we calculate cost for every peeling option. */ if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) - possible_npeel_number = vf /nelements; + { + if (STMT_SLP_TYPE (stmt_info)) + possible_npeel_number + = (vf * GROUP_SIZE (stmt_info)) / nelements; + else + possible_npeel_number = vf / nelements; + } /* Handle the aligned case. We may decide to align some other access, making DR unaligned. */ @@ -1453,7 +1460,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) for (j = 0; j < possible_npeel_number; j++) { - gcc_assert (npeel_tmp <= vf); vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp); npeel_tmp += nelements; } @@ -2230,8 +2236,13 @@ vect_analyze_group_access (struct data_reference *dr) BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt); } - /* There is a gap in the end of the group. */ - if (groupsize - last_accessed_element > 0 && loop_vinfo) + /* If there is a gap in the end of the group or the group size cannot + be made a multiple of the vector element count then we access excess + elements in the last iteration and thus need to peel that off. */ + if (loop_vinfo + && (groupsize - last_accessed_element > 0 + || exact_log2 (groupsize) == -1)) + { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |