diff options
Diffstat (limited to 'gcc/tree-vect-stmts.cc')
-rw-r--r-- | gcc/tree-vect-stmts.cc | 58 |
1 files changed, 28 insertions, 30 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 4219ad8..935d80f 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2072,16 +2072,22 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, dr_alignment_support alss; int misalign = dr_misalignment (first_dr_info, vectype); tree half_vtype; + poly_uint64 remain; + unsigned HOST_WIDE_INT tem, num; if (overrun_p && !masked_p && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, misalign))) == dr_aligned || alss == dr_unaligned_supported) - && known_eq (nunits, (group_size - gap) * 2) - && known_eq (nunits, group_size) - && (vector_vector_composition_type (vectype, 2, &half_vtype) - != NULL_TREE)) + && can_div_trunc_p (group_size + * LOOP_VINFO_VECT_FACTOR (loop_vinfo) - gap, + nunits, &tem, &remain) + && (known_eq (remain, 0u) + || (constant_multiple_p (nunits, remain, &num) + && (vector_vector_composition_type (vectype, num, + &half_vtype) + != NULL_TREE)))) overrun_p = false; if (overrun_p && !can_overrun_p) @@ -11513,33 +11519,14 @@ vectorizable_load (vec_info *vinfo, unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info); unsigned int vect_align = vect_known_alignment_in_bytes (first_dr_info, vectype); - unsigned int scalar_dr_size - = vect_get_scalar_dr_size (first_dr_info); - /* If there's no peeling for gaps but we have a gap - with slp loads then load the lower half of the - vector only. See get_group_load_store_type for - when we apply this optimization. */ - if (slp - && loop_vinfo - && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0 - && known_eq (nunits, (group_size - gap) * 2) - && known_eq (nunits, group_size) - && gap >= (vect_align / scalar_dr_size)) - { - tree half_vtype; - new_vtype - = vector_vector_composition_type (vectype, 2, - &half_vtype); - if (new_vtype != NULL_TREE) - ltype = half_vtype; - } /* Try to use a single smaller load when we are about to load excess elements compared to the unrolled - scalar loop. - ??? This should cover the above case as well. */ - else if (known_gt ((vec_num * j + i + 1) * nunits, + scalar loop. */ + if (known_gt ((vec_num * j + i + 1) * nunits, (group_size * vf - gap))) { + poly_uint64 remain = ((group_size * vf - gap) + - (vec_num * j + i) * nunits); if (known_ge ((vec_num * j + i + 1) * nunits - (group_size * vf - gap), nunits)) /* DR will be unused. */ @@ -11551,11 +11538,15 @@ vectorizable_load (vec_info *vinfo, at least one element is accessed in the scalar loop. */ ; + else if (known_gt (vect_align, + ((nunits - remain) + * vect_get_scalar_dr_size + (first_dr_info)))) + /* Aligned access to the gap area when there's + at least one element in it is OK. */ + ; else { - auto remain - = ((group_size * vf - gap) - - (vec_num * j + i) * nunits); /* remain should now be > 0 and < nunits. */ unsigned num; if (constant_multiple_p (nunits, remain, &num)) @@ -11569,6 +11560,13 @@ vectorizable_load (vec_info *vinfo, ltype = ptype; } /* Else use multiple loads or a masked load? */ + /* For loop vectorization we now should have + an alternate type or LOOP_VINFO_PEELING_FOR_GAPS + set. */ + if (loop_vinfo) + gcc_assert (new_vtype + || LOOP_VINFO_PEELING_FOR_GAPS + (loop_vinfo)); } } tree offset |