aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2021-07-26 10:22:23 +0100
committerTamar Christina <tamar.christina@arm.com>2021-07-26 10:22:23 +0100
commit2050ac1a547eebe7de4af98b57429a934e75fff4 (patch)
tree48bd4189de691773b62c86b47c95d12ad313b0a4 /gcc
parentacf9d1fd806fabf62dfe232439b11263c191e32d (diff)
downloadgcc-2050ac1a547eebe7de4af98b57429a934e75fff4.zip
gcc-2050ac1a547eebe7de4af98b57429a934e75fff4.tar.gz
gcc-2050ac1a547eebe7de4af98b57429a934e75fff4.tar.bz2
AArch64: correct usdot vectorizer and intrinsics optabs
There's a slight mismatch between the vectorizer optabs and the intrinsics patterns for NEON. The vectorizer expects operands[3] and operands[0] to be the same but the aarch64 intrinsics expanders expect operands[0] and operands[1] to be the same. This means we need different patterns here. This adds a separate usdot vectorizer pattern which just shuffles around the RTL params. There's also an inconsistency between the usdot and (u|s)dot intrinsics RTL patterns which is not corrected here. gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (TYPES_TERNOP_SUSS, aarch64_types_ternop_suss_qualifiers): New. * config/aarch64/aarch64-simd-builtins.def (usdot_prod): Use it. * config/aarch64/aarch64-simd.md (usdot_prod<vsi2qi>): Re-organize RTL. * config/aarch64/arm_neon.h (vusdot_s32, vusdotq_s32): Use it.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c4
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def2
-rw-r--r--gcc/config/aarch64/aarch64-simd.md28
-rw-r--r--gcc/config/aarch64/arm_neon.h4
4 files changed, 21 insertions, 17 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 9ed4b72..f6b41d9 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -209,6 +209,10 @@ static enum aarch64_type_qualifiers
aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none };
+#define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers)
static enum aarch64_type_qualifiers
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index b7f1237..3bb45a8 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -377,7 +377,7 @@
/* Implemented by <sur><dotprod>_prod<dot_mode>. */
BUILTIN_VB (TERNOP, sdot, 0, NONE)
BUILTIN_VB (TERNOPU, udot, 0, NONE)
- BUILTIN_VB (TERNOP_SSUS, usdot_prod, 10, NONE)
+ BUILTIN_VB (TERNOP_SUSS, usdot_prod, 10, NONE)
/* Implemented by aarch64_<sur><dotprod>_lane{q}<dot_mode>. */
BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE)
BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 7332a73..bf667b9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -599,20 +599,6 @@
[(set_attr "type" "neon_dot<q>")]
)
-;; These instructions map to the __builtins for the armv8.6a I8MM usdot
-;; (vector) Dot Product operation.
-(define_insn "usdot_prod<vsi2qi>"
- [(set (match_operand:VS 0 "register_operand" "=w")
- (plus:VS
- (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
- (match_operand:<VSI2QI> 3 "register_operand" "w")]
- UNSPEC_USDOT)
- (match_operand:VS 1 "register_operand" "0")))]
- "TARGET_I8MM"
- "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
- [(set_attr "type" "neon_dot<q>")]
-)
-
;; These expands map to the Dot Product optab the vectorizer checks for.
;; The auto-vectorizer expects a dot product builtin that also does an
;; accumulation into the provided register.
@@ -648,6 +634,20 @@
DONE;
})
+;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
+;; (vector) Dot Product operation and the vectorized optab.
+(define_insn "usdot_prod<vsi2qi>"
+ [(set (match_operand:VS 0 "register_operand" "=w")
+ (plus:VS
+ (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
+ (match_operand:<VSI2QI> 2 "register_operand" "w")]
+ UNSPEC_USDOT)
+ (match_operand:VS 3 "register_operand" "0")))]
+ "TARGET_I8MM"
+ "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
+ [(set_attr "type" "neon_dot<q>")]
+)
+
;; These instructions map to the __builtins for the Dot Product
;; indexed operations.
(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7523974..0f43994 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -33744,14 +33744,14 @@ __extension__ extern __inline int32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b)
{
- return __builtin_aarch64_usdot_prodv8qi_ssus (__r, __a, __b);
+ return __builtin_aarch64_usdot_prodv8qi_suss (__a, __b, __r);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
{
- return __builtin_aarch64_usdot_prodv16qi_ssus (__r, __a, __b);
+ return __builtin_aarch64_usdot_prodv16qi_suss (__a, __b, __r);
}
__extension__ extern __inline int32x2_t