diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-06-06 11:09:12 +0100 |
---|---|---|
committer | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2023-06-06 11:09:12 +0100 |
commit | 6be5d852216d36f5b0024cd581c2508c168647a6 (patch) | |
tree | dc89f1ffe1e707ebfebd71e9abec4dccf22d02c4 | |
parent | 9371640999eedb8bac3fb9d1429db8a1a905b853 (diff) | |
download | gcc-6be5d852216d36f5b0024cd581c2508c168647a6.zip gcc-6be5d852216d36f5b0024cd581c2508c168647a6.tar.gz gcc-6be5d852216d36f5b0024cd581c2508c168647a6.tar.bz2 |
aarch64: Improve representation of vpaddd intrinsics
The aarch64_addpdi pattern is redundant as the reduc_plus_scal_<mode> pattern can already generate
the required form of the ADDP instruction, and is mostly folded to GIMPLE early on so can benefit from more optimisations.
Though it turns out that we were missing the folding for the unsigned variants.
This patch adds that and wires up the vpaddd_u64 and vpaddd_s64 intrinsics through the above pattern instead
so that we can remove a redundant pattern and get more optimisation earlier.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.cc (aarch64_general_gimple_fold_builtin):
Handle unsigned reduc_plus_scal_ builtins.
* config/aarch64/aarch64-simd-builtins.def (addp): Delete DImode instances.
* config/aarch64/aarch64-simd.md (aarch64_addpdi): Delete.
* config/aarch64/arm_neon.h (vpaddd_s64): Reimplement with
__builtin_aarch64_reduc_plus_scal_v2di.
(vpaddd_u64): Reimplement with __builtin_aarch64_reduc_plus_scal_v2di_uu.
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.cc | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 10 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 4 |
4 files changed, 3 insertions, 14 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index e0bb212..50c20c8 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -3049,6 +3049,7 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, switch (fcode) { BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL) + BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, NONE) new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS, 1, args[0]); gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 1beaa08..94ff3f1 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -53,8 +53,6 @@ BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP) BUILTIN_VDQ_I (BINOP, addp, 0, NONE) BUILTIN_VDQ_I (BINOPU, addp, 0, NONE) - VAR1 (UNOP, addp, 0, NONE, di) - VAR1 (UNOPU, addp, 0, NONE, di) BUILTIN_VDQ_BHSI (UNOP, clrsb, 2, NONE) BUILTIN_VDQ_BHSI (UNOP, clz, 2, NONE) BUILTIN_VS (UNOP, ctz, 2, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index dd1b084..dbd6fc6 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7025,16 +7025,6 @@ [(set_attr "type" "neon_reduc_add<q>")] ) -(define_insn "aarch64_addpdi" - [(set (match_operand:DI 0 "register_operand" "=w") - (unspec:DI - [(match_operand:V2DI 1 "register_operand" "w")] - UNSPEC_ADDP))] - "TARGET_SIMD" - "addp\t%d0, %1.2d" - [(set_attr "type" "neon_reduc_add")] -) - ;; sqrt (define_expand "sqrt<mode>2" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index afe205cb..0bb9839 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -17588,14 +17588,14 @@ __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddd_s64 (int64x2_t __a) { - return __builtin_aarch64_addpdi (__a); + return __builtin_aarch64_reduc_plus_scal_v2di (__a); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vpaddd_u64 (uint64x2_t __a) { - return __builtin_aarch64_addpdi_uu (__a); + return __builtin_aarch64_reduc_plus_scal_v2di_uu (__a); } /* vqabs */ |