aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-cfg.cc
diff options
context:
space:
mode:
authorPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>2022-06-12 08:50:16 +0530
committerPrathamesh Kulkarni <prathamesh.kulkarni@linaro.org>2022-06-12 08:55:04 +0530
commit494bec025002df422f2faa947138bf3643d80b54 (patch)
treee6c5ba8e07f688100d879290d8b2f3ad929e34e4 /gcc/tree-cfg.cc
parentcbd842717ec5cab989141bf1575846c2acef818d (diff)
downloadgcc-494bec025002df422f2faa947138bf3643d80b54.zip
gcc-494bec025002df422f2faa947138bf3643d80b54.tar.gz
gcc-494bec025002df422f2faa947138bf3643d80b54.tar.bz2
PR96463: Optimise svld1rq from vectors for little endian AArch64 targets.
The patch folds: lhs = svld1rq({-1, -1, ...}, rhs) into: tmp = mem_ref<vectype> [(elem_type * {ref-all}) rhs] lhs = vec_perm_expr<tmp, tmp, {0, 1, 2, 3 ...}>. which is then expanded using aarch64_expand_sve_dupq. Example: svint32_t foo (int32x4_t x) { return svld1rq (svptrue_b8 (), &x[0]); } code-gen: foo: .LFB4350: dup z0.q, z0.q[0] ret The patch relaxes type-checking for VEC_PERM_EXPR by allowing different vector types for lhs and rhs provided: (1) rhs3 is constant and has integer type element. (2) len(lhs) == len(rhs3) and len(rhs1) == len(rhs2) (3) lhs and rhs have same element type. gcc/ChangeLog: PR target/96463 * config/aarch64/aarch64-sve-builtins-base.cc: Include ssa.h. (svld1rq_impl::fold): Define. * config/aarch64/aarch64.cc (expand_vec_perm_d): Define new members op_mode and op_vec_flags. (aarch64_evpc_reencode): Initialize newd.op_mode and newd.op_vec_flags. (aarch64_evpc_sve_dup): New function. (aarch64_expand_vec_perm_const_1): Gate existing calls to aarch64_evpc_* functions under d->vmode == d->op_mode, and call aarch64_evpc_sve_dup. (aarch64_vectorize_vec_perm_const): Remove assert d->vmode != d->op_mode, and initialize d.op_mode and d.op_vec_flags. * tree-cfg.cc (verify_gimple_assign_ternary): Allow different vector types for lhs and rhs in VEC_PERM_EXPR if rhs3 is constant. gcc/testsuite/ChangeLog: PR target/96463 * gcc.target/aarch64/sve/acle/general/pr96463-1.c: New test. * gcc.target/aarch64/sve/acle/general/pr96463-2.c: Likewise.
Diffstat (limited to 'gcc/tree-cfg.cc')
-rw-r--r--gcc/tree-cfg.cc40
1 files changed, 27 insertions, 13 deletions
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index 8de1b14..9e5d84a 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -4297,18 +4297,14 @@ verify_gimple_assign_ternary (gassign *stmt)
break;
case VEC_PERM_EXPR:
- if (!useless_type_conversion_p (lhs_type, rhs1_type)
- || !useless_type_conversion_p (lhs_type, rhs2_type))
- {
- error ("type mismatch in %qs", code_name);
- debug_generic_expr (lhs_type);
- debug_generic_expr (rhs1_type);
- debug_generic_expr (rhs2_type);
- debug_generic_expr (rhs3_type);
- return true;
- }
+ /* If permute is constant, then we allow for lhs and rhs
+ to have different vector types, provided:
+ (1) lhs, rhs1, rhs2 have same element type.
+ (2) rhs3 vector is constant and has integer element type.
+ (3) len(lhs) == len(rhs3) && len(rhs1) == len(rhs2). */
- if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+ if (TREE_CODE (lhs_type) != VECTOR_TYPE
+ || TREE_CODE (rhs1_type) != VECTOR_TYPE
|| TREE_CODE (rhs2_type) != VECTOR_TYPE
|| TREE_CODE (rhs3_type) != VECTOR_TYPE)
{
@@ -4320,10 +4316,28 @@ verify_gimple_assign_ternary (gassign *stmt)
return true;
}
+ /* If rhs3 is constant, we allow lhs, rhs1 and rhs2 to be different vector types,
+ as long as lhs, rhs1 and rhs2 have same element type. */
+ if (TREE_CONSTANT (rhs3)
+ ? (!useless_type_conversion_p (TREE_TYPE (lhs_type), TREE_TYPE (rhs1_type))
+ || !useless_type_conversion_p (TREE_TYPE (lhs_type), TREE_TYPE (rhs2_type)))
+ : (!useless_type_conversion_p (lhs_type, rhs1_type)
+ || !useless_type_conversion_p (lhs_type, rhs2_type)))
+ {
+ error ("type mismatch in %qs", code_name);
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ /* If rhs3 is constant, relax the check len(rhs2) == len(rhs3). */
if (maybe_ne (TYPE_VECTOR_SUBPARTS (rhs1_type),
TYPE_VECTOR_SUBPARTS (rhs2_type))
- || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs2_type),
- TYPE_VECTOR_SUBPARTS (rhs3_type))
+ || (!TREE_CONSTANT(rhs3)
+ && maybe_ne (TYPE_VECTOR_SUBPARTS (rhs2_type),
+ TYPE_VECTOR_SUBPARTS (rhs3_type)))
|| maybe_ne (TYPE_VECTOR_SUBPARTS (rhs3_type),
TYPE_VECTOR_SUBPARTS (lhs_type)))
{