aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-stmts.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-stmts.cc')
-rw-r--r--gcc/tree-vect-stmts.cc58
1 files changed, 28 insertions, 30 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 4219ad8..935d80f 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2072,16 +2072,22 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
dr_alignment_support alss;
int misalign = dr_misalignment (first_dr_info, vectype);
tree half_vtype;
+ poly_uint64 remain;
+ unsigned HOST_WIDE_INT tem, num;
if (overrun_p
&& !masked_p
&& (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
vectype, misalign)))
== dr_aligned
|| alss == dr_unaligned_supported)
- && known_eq (nunits, (group_size - gap) * 2)
- && known_eq (nunits, group_size)
- && (vector_vector_composition_type (vectype, 2, &half_vtype)
- != NULL_TREE))
+ && can_div_trunc_p (group_size
+ * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap,
+ nunits, &tem, &remain)
+ && (known_eq (remain, 0u)
+ || (constant_multiple_p (nunits, remain, &num)
+ && (vector_vector_composition_type (vectype, num,
+ &half_vtype)
+ != NULL_TREE))))
overrun_p = false;
if (overrun_p && !can_overrun_p)
@@ -11513,33 +11519,14 @@ vectorizable_load (vec_info *vinfo,
unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
unsigned int vect_align
= vect_known_alignment_in_bytes (first_dr_info, vectype);
- unsigned int scalar_dr_size
- = vect_get_scalar_dr_size (first_dr_info);
- /* If there's no peeling for gaps but we have a gap
- with slp loads then load the lower half of the
- vector only. See get_group_load_store_type for
- when we apply this optimization. */
- if (slp
- && loop_vinfo
- && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
- && known_eq (nunits, (group_size - gap) * 2)
- && known_eq (nunits, group_size)
- && gap >= (vect_align / scalar_dr_size))
- {
- tree half_vtype;
- new_vtype
- = vector_vector_composition_type (vectype, 2,
- &half_vtype);
- if (new_vtype != NULL_TREE)
- ltype = half_vtype;
- }
/* Try to use a single smaller load when we are about
to load excess elements compared to the unrolled
- scalar loop.
- ??? This should cover the above case as well. */
- else if (known_gt ((vec_num * j + i + 1) * nunits,
+ scalar loop. */
+ if (known_gt ((vec_num * j + i + 1) * nunits,
(group_size * vf - gap)))
{
+ poly_uint64 remain = ((group_size * vf - gap)
+ - (vec_num * j + i) * nunits);
if (known_ge ((vec_num * j + i + 1) * nunits
- (group_size * vf - gap), nunits))
/* DR will be unused. */
@@ -11551,11 +11538,15 @@ vectorizable_load (vec_info *vinfo,
at least one element is accessed in the
scalar loop. */
;
+ else if (known_gt (vect_align,
+ ((nunits - remain)
+ * vect_get_scalar_dr_size
+ (first_dr_info))))
+ /* Aligned access to the gap area when there's
+ at least one element in it is OK. */
+ ;
else
{
- auto remain
- = ((group_size * vf - gap)
- - (vec_num * j + i) * nunits);
/* remain should now be > 0 and < nunits. */
unsigned num;
if (constant_multiple_p (nunits, remain, &num))
@@ -11569,6 +11560,13 @@ vectorizable_load (vec_info *vinfo,
ltype = ptype;
}
/* Else use multiple loads or a masked load? */
+ /* For loop vectorization we now should have
+ an alternate type or LOOP_VINFO_PEELING_FOR_GAPS
+ set. */
+ if (loop_vinfo)
+ gcc_assert (new_vtype
+ || LOOP_VINFO_PEELING_FOR_GAPS
+ (loop_vinfo));
}
}
tree offset