aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKewen Lin <linkw@linux.ibm.com>2020-04-02 08:48:03 -0500
committerKewen Lin <linkw@linux.ibm.com>2020-04-02 08:54:11 -0500
commit81ce375d1fdd99f9d93b00f4895eab74c3d8b54a (patch)
treeacc2712194440216d6a4a94c1d4c92ce520ae591
parent68cbee9bf53332721ae33c517ca46039368c0a95 (diff)
downloadgcc-81ce375d1fdd99f9d93b00f4895eab74c3d8b54a.zip
gcc-81ce375d1fdd99f9d93b00f4895eab74c3d8b54a.tar.gz
gcc-81ce375d1fdd99f9d93b00f4895eab74c3d8b54a.tar.bz2
Fix PR94401 by considering reverse overrun
The commit r10-7415 brings scalar type consideration to eliminate epilogue peeling for gaps, but it exposed one problem that the current handling doesn't consider the memory access type VMAT_CONTIGUOUS_REVERSE, for which the overrun happens on low address side. This patch is to make the code take care of it by updating the offset and construction element order accordingly. Bootstrapped/regtested on powerpc64le-linux-gnu P8 and aarch64-linux-gnu. 2020-04-02 Kewen Lin <linkw@gcc.gnu.org> gcc/ChangeLog PR tree-optimization/94401 * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE access type when loading halves of vector to avoid peeling for gaps.
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/tree-vect-stmts.c38
2 files changed, 35 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2cec367..f0a9509 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2020-04-02 Kewen Lin <linkw@gcc.gnu.org>
+
+ PR tree-optimization/94401
+ * tree-vect-loop.c (vectorizable_load): Handle VMAT_CONTIGUOUS_REVERSE
+ access type when loading halves of vector to avoid peeling for gaps.
+
2020-04-02 Jakub Jelinek <jakub@redhat.com>
* config/mips/mti-linux.h (SYSROOT_SUFFIX_SPEC): Add a space in
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 46bc2bd..7f3a9fb 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9602,11 +9602,20 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (new_vtype != NULL_TREE)
ltype = half_vtype;
}
+ tree offset
+ = (dataref_offset ? dataref_offset
+ : build_int_cst (ref_type, 0));
+ if (ltype != vectype
+ && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+ unsigned HOST_WIDE_INT gap
+ = DR_GROUP_GAP (first_stmt_info);
+ gap *= tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
+ tree gapcst = build_int_cst (ref_type, gap);
+ offset = size_binop (PLUS_EXPR, offset, gapcst);
+ }
data_ref
- = fold_build2 (MEM_REF, ltype, dataref_ptr,
- dataref_offset
- ? dataref_offset
- : build_int_cst (ref_type, 0));
+ = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
if (alignment_support_scheme == dr_aligned)
;
else if (DR_MISALIGNMENT (first_dr_info) == -1)
@@ -9619,16 +9628,27 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
TYPE_ALIGN (elem_type));
if (ltype != vectype)
{
- vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
+ vect_copy_ref_info (data_ref,
+ DR_REF (first_dr_info->dr));
tree tem = make_ssa_name (ltype);
new_stmt = gimple_build_assign (tem, data_ref);
- vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vect_finish_stmt_generation (stmt_info, new_stmt,
+ gsi);
data_ref = NULL;
vec<constructor_elt, va_gc> *v;
vec_alloc (v, 2);
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
- CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
- build_zero_cst (ltype));
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+ {
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (ltype));
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+ }
+ else
+ {
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+ build_zero_cst (ltype));
+ }
gcc_assert (new_vtype != NULL_TREE);
if (new_vtype == vectype)
new_stmt = gimple_build_assign (