diff options
author | Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> | 2022-06-12 08:50:16 +0530 |
---|---|---|
committer | Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> | 2022-06-12 08:55:04 +0530 |
commit | 494bec025002df422f2faa947138bf3643d80b54 (patch) | |
tree | e6c5ba8e07f688100d879290d8b2f3ad929e34e4 /gcc/tree-cfg.cc | |
parent | cbd842717ec5cab989141bf1575846c2acef818d (diff) | |
download | gcc-494bec025002df422f2faa947138bf3643d80b54.zip gcc-494bec025002df422f2faa947138bf3643d80b54.tar.gz gcc-494bec025002df422f2faa947138bf3643d80b54.tar.bz2 |
PR96463: Optimise svld1rq from vectors for little endian AArch64 targets.
The patch folds:
lhs = svld1rq({-1, -1, ...}, rhs)
into:
tmp = mem_ref<vectype> [(elem_type * {ref-all}) rhs]
lhs = vec_perm_expr<tmp, tmp, {0, 1, 2, 3 ...}>.
which is then expanded using aarch64_expand_sve_dupq.
Example:
svint32_t
foo (int32x4_t x)
{
return svld1rq (svptrue_b8 (), &x[0]);
}
code-gen:
foo:
.LFB4350:
dup z0.q, z0.q[0]
ret
The patch relaxes type-checking for VEC_PERM_EXPR by allowing different
vector types for lhs and rhs provided:
(1) rhs3 is constant and has integer type element.
(2) len(lhs) == len(rhs3) and len(rhs1) == len(rhs2)
(3) lhs and rhs have same element type.
gcc/ChangeLog:
PR target/96463
* config/aarch64/aarch64-sve-builtins-base.cc: Include ssa.h.
(svld1rq_impl::fold): Define.
* config/aarch64/aarch64.cc (expand_vec_perm_d): Define new members
op_mode and op_vec_flags.
(aarch64_evpc_reencode): Initialize newd.op_mode and
newd.op_vec_flags.
(aarch64_evpc_sve_dup): New function.
(aarch64_expand_vec_perm_const_1): Gate existing calls to
aarch64_evpc_* functions under d->vmode == d->op_mode,
and call aarch64_evpc_sve_dup.
(aarch64_vectorize_vec_perm_const): Remove assert
d->vmode != d->op_mode, and initialize d.op_mode and d.op_vec_flags.
* tree-cfg.cc (verify_gimple_assign_ternary): Allow different
vector types for lhs and rhs in VEC_PERM_EXPR if rhs3 is
constant.
gcc/testsuite/ChangeLog:
PR target/96463
* gcc.target/aarch64/sve/acle/general/pr96463-1.c: New test.
* gcc.target/aarch64/sve/acle/general/pr96463-2.c: Likewise.
Diffstat (limited to 'gcc/tree-cfg.cc')
-rw-r--r-- | gcc/tree-cfg.cc | 40 |
1 files changed, 27 insertions, 13 deletions
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index 8de1b14..9e5d84a 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -4297,18 +4297,14 @@ verify_gimple_assign_ternary (gassign *stmt) break; case VEC_PERM_EXPR: - if (!useless_type_conversion_p (lhs_type, rhs1_type) - || !useless_type_conversion_p (lhs_type, rhs2_type)) - { - error ("type mismatch in %qs", code_name); - debug_generic_expr (lhs_type); - debug_generic_expr (rhs1_type); - debug_generic_expr (rhs2_type); - debug_generic_expr (rhs3_type); - return true; - } + /* If permute is constant, then we allow for lhs and rhs + to have different vector types, provided: + (1) lhs, rhs1, rhs2 have same element type. + (2) rhs3 vector is constant and has integer element type. + (3) len(lhs) == len(rhs3) && len(rhs1) == len(rhs2). */ - if (TREE_CODE (rhs1_type) != VECTOR_TYPE + if (TREE_CODE (lhs_type) != VECTOR_TYPE + || TREE_CODE (rhs1_type) != VECTOR_TYPE || TREE_CODE (rhs2_type) != VECTOR_TYPE || TREE_CODE (rhs3_type) != VECTOR_TYPE) { @@ -4320,10 +4316,28 @@ verify_gimple_assign_ternary (gassign *stmt) return true; } + /* If rhs3 is constant, we allow lhs, rhs1 and rhs2 to be different vector types, + as long as lhs, rhs1 and rhs2 have same element type. */ + if (TREE_CONSTANT (rhs3) + ? (!useless_type_conversion_p (TREE_TYPE (lhs_type), TREE_TYPE (rhs1_type)) + || !useless_type_conversion_p (TREE_TYPE (lhs_type), TREE_TYPE (rhs2_type))) + : (!useless_type_conversion_p (lhs_type, rhs1_type) + || !useless_type_conversion_p (lhs_type, rhs2_type))) + { + error ("type mismatch in %qs", code_name); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + debug_generic_expr (rhs2_type); + debug_generic_expr (rhs3_type); + return true; + } + + /* If rhs3 is constant, relax the check len(rhs2) == len(rhs3). */ if (maybe_ne (TYPE_VECTOR_SUBPARTS (rhs1_type), TYPE_VECTOR_SUBPARTS (rhs2_type)) - || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs2_type), - TYPE_VECTOR_SUBPARTS (rhs3_type)) + || (!TREE_CONSTANT(rhs3) + && maybe_ne (TYPE_VECTOR_SUBPARTS (rhs2_type), + TYPE_VECTOR_SUBPARTS (rhs3_type))) || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs3_type), TYPE_VECTOR_SUBPARTS (lhs_type))) { |