diff options
author | Tamar Christina <tamar.christina@arm.com> | 2021-07-26 10:22:23 +0100 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2021-07-26 10:22:23 +0100 |
commit | 2050ac1a547eebe7de4af98b57429a934e75fff4 (patch) | |
tree | 48bd4189de691773b62c86b47c95d12ad313b0a4 /gcc | |
parent | acf9d1fd806fabf62dfe232439b11263c191e32d (diff) | |
download | gcc-2050ac1a547eebe7de4af98b57429a934e75fff4.zip gcc-2050ac1a547eebe7de4af98b57429a934e75fff4.tar.gz gcc-2050ac1a547eebe7de4af98b57429a934e75fff4.tar.bz2 |
AArch64: correct usdot vectorizer and intrinsics optabs
There's a slight mismatch between the vectorizer optabs and the intrinsics
patterns for NEON. The vectorizer expects operands[3] and operands[0] to be
the same but the aarch64 intrinsics expanders expect operands[0] and
operands[1] to be the same.
This means we need different patterns here. This adds a separate usdot
vectorizer pattern which just shuffles around the RTL params.
There's also an inconsistency between the usdot and (u|s)dot intrinsics RTL
patterns which is not corrected here.
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.c (TYPES_TERNOP_SUSS,
aarch64_types_ternop_suss_qualifiers): New.
* config/aarch64/aarch64-simd-builtins.def (usdot_prod): Use it.
* config/aarch64/aarch64-simd.md (usdot_prod<vsi2qi>): Re-organize RTL.
* config/aarch64/arm_neon.h (vusdot_s32, vusdotq_s32): Use it.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.c | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 28 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 4 |
4 files changed, 21 insertions, 17 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 9ed4b72..f6b41d9 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -209,6 +209,10 @@ static enum aarch64_type_qualifiers aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none }; #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none }; +#define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers) static enum aarch64_type_qualifiers diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index b7f1237..3bb45a8 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -377,7 +377,7 @@ /* Implemented by <sur><dotprod>_prod<dot_mode>. */ BUILTIN_VB (TERNOP, sdot, 0, NONE) BUILTIN_VB (TERNOPU, udot, 0, NONE) - BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE) + BUILTIN_VB (TERNOP_SUSS, usdot_prod, 10, NONE) /* Implemented by aarch64_<sur><dotprod>_lane{q}<dot_mode>. */ BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE) BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7332a73..bf667b9 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -599,20 +599,6 @@ [(set_attr "type" "neon_dot<q>")] ) -;; These instructions map to the __builtins for the armv8.6a I8MM usdot -;; (vector) Dot Product operation. -(define_insn "usdot_prod<vsi2qi>" - [(set (match_operand:VS 0 "register_operand" "=w") - (plus:VS - (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") - (match_operand:<VSI2QI> 3 "register_operand" "w")] - UNSPEC_USDOT) - (match_operand:VS 1 "register_operand" "0")))] - "TARGET_I8MM" - "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>" - [(set_attr "type" "neon_dot<q>")] -) - ;; These expands map to the Dot Product optab the vectorizer checks for. ;; The auto-vectorizer expects a dot product builtin that also does an ;; accumulation into the provided register. @@ -648,6 +634,20 @@ DONE; }) +;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot +;; (vector) Dot Product operation and the vectorized optab. +(define_insn "usdot_prod<vsi2qi>" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS + (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w") + (match_operand:<VSI2QI> 2 "register_operand" "w")] + UNSPEC_USDOT) + (match_operand:VS 3 "register_operand" "0")))] + "TARGET_I8MM" + "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>" + [(set_attr "type" "neon_dot<q>")] +) + ;; These instructions map to the __builtins for the Dot Product ;; indexed operations. (define_insn "aarch64_<sur>dot_lane<vsi2qi>" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 7523974..0f43994 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -33744,14 +33744,14 @@ __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) { - return __builtin_aarch64_usdot_prodv8qi_ssus (__r, __a, __b); + return __builtin_aarch64_usdot_prodv8qi_suss (__a, __b, __r); } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) { - return __builtin_aarch64_usdot_prodv16qi_ssus (__r, __a, __b); + return __builtin_aarch64_usdot_prodv16qi_suss (__a, __b, __r); } __extension__ extern __inline int32x2_t |