aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2021-07-14 15:20:45 +0100
committerTamar Christina <tamar.christina@arm.com>2021-07-14 15:20:45 +0100
commit6412c58c781f64b60e7353e762cd5cec62a863e7 (patch)
tree99440fc19492cfd9556e536ce27f5d2e4a332332 /gcc
parent752045ed1eea0eddc48923df78999dab7f2827ba (diff)
downloadgcc-6412c58c781f64b60e7353e762cd5cec62a863e7.zip
gcc-6412c58c781f64b60e7353e762cd5cec62a863e7.tar.gz
gcc-6412c58c781f64b60e7353e762cd5cec62a863e7.tar.bz2
AArch32: Add support for sign differing dot-product usdot for NEON.
This adds optabs implementing usdot_prod. The following testcase: #define N 480 #define SIGNEDNESS_1 unsigned #define SIGNEDNESS_2 signed #define SIGNEDNESS_3 signed #define SIGNEDNESS_4 unsigned SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 char *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) { int av = a[i]; int bv = b[i]; SIGNEDNESS_2 short mult = av * bv; res += mult; } return res; } Generates f: vmov.i32 q8, #0 @ v4si add r3, r2, #480 .L2: vld1.8 {q10}, [r2]! vld1.8 {q9}, [r1]! vusdot.s8 q8, q9, q10 cmp r3, r2 bne .L2 vadd.i32 d16, d16, d17 vpadd.i32 d16, d16, d16 vmov.32 r3, d16[0] add r0, r0, r3 bx lr instead of f: vmov.i32 q8, #0 @ v4si add r3, r2, #480 .L2: vld1.8 {q9}, [r2]! vld1.8 {q11}, [r1]! cmp r3, r2 vmull.s8 q10, d18, d22 vmull.s8 q9, d19, d23 vaddw.s16 q8, q8, d20 vaddw.s16 q8, q8, d21 vaddw.s16 q8, q8, d18 vaddw.s16 q8, q8, d19 bne .L2 vadd.i32 d16, d16, d17 vpadd.i32 d16, d16, d16 vmov.32 r3, d16[0] add r0, r0, r3 bx lr For NEON. I couldn't figure out if the MVE instruction vmlaldav.s16 could be used to emulate this. Because it would require additional widening to work I left MVE out of this patch set but perhaps someone should take a look. gcc/ChangeLog: * config/arm/neon.md (usdot_prod<vsi2qi>): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vusdot-autovec.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/neon.md12
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c38
2 files changed, 50 insertions, 0 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 64365e0..8b0a396 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2969,6 +2969,18 @@
DONE;
})
+;; Auto-vectorizer pattern for usdot
+(define_expand "usdot_prod<vsi2qi>"
+ [(set (match_operand:VCVTI 0 "register_operand")
+ (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
+ "register_operand")
+ (match_operand:<VSI2QI> 2
+ "register_operand")]
+ UNSPEC_DOT_US)
+ (match_operand:VCVTI 3 "register_operand")))]
+ "TARGET_I8MM"
+)
+
(define_expand "neon_copysignf<mode>"
[(match_operand:VCVTF 0 "register_operand")
(match_operand:VCVTF 1 "register_operand")
diff --git a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
new file mode 100644
index 0000000..7cc56f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */
+
+#define N 480
+#define SIGNEDNESS_1 unsigned
+#define SIGNEDNESS_2 signed
+#define SIGNEDNESS_3 signed
+#define SIGNEDNESS_4 unsigned
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a,
+ SIGNEDNESS_4 char *restrict b)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+SIGNEDNESS_1 int __attribute__ ((noipa))
+g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b,
+ SIGNEDNESS_4 char *restrict a)
+{
+ for (__INTPTR_TYPE__ i = 0; i < N; ++i)
+ {
+ int av = a[i];
+ int bv = b[i];
+ SIGNEDNESS_2 short mult = av * bv;
+ res += mult;
+ }
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { arm-*-*-gnueabihf } } } } */