aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorVictor Do Nascimento <victor.donascimento@arm.com>2024-07-05 15:18:32 +0100
committerVictor Do Nascimento <victor.donascimento@arm.com>2024-09-30 15:59:43 +0100
commit8398ef96cc503cffb1447c5b02741e24423ec120 (patch)
tree9d208c944a8ed582e229a25c9804925b036e496e /gcc
parentfd35d99914051c9c58b91b167f4802c8db460038 (diff)
downloadgcc-8398ef96cc503cffb1447c5b02741e24423ec120.zip
gcc-8398ef96cc503cffb1447c5b02741e24423ec120.tar.gz
gcc-8398ef96cc503cffb1447c5b02741e24423ec120.tar.bz2
autovectorizer: Test autovectorization of different dot-prod modes.
Given the novel treatment of the dot product optab as a conversion, we are now able to target different relationships between output modes and input modes. This is made clearer by way of example. Previously, on AArch64, the following loop was vectorizable: uint32_t udot4(int n, uint8_t* data) { uint32_t sum = 0; for (int i=0; i<n; i+=1) sum += data[i] * data[i]; return sum; } while the following was not: uint32_t udot2(int n, uint16_t* data) { uint32_t sum = 0; for (int i=0; i<n; i+=1) sum += data[i] * data[i]; return sum; } Under the new treatment of the dot product optab, they are both now vectorizable. This adds the relevant target-agnostic check to ensure this behavior in the autovectorizer, gated behind the new check_effective_target `vect_dotprod_hisi' as well a runtime check targeting aarch64. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_vect_dotprod_hisi): New. * gcc.dg/vect/vect-dotprod-conv-optab.c: Likewise. * gcc.target/aarch64/vect-dotprod-twoway-hisi.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c41
-rw-r--r--gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c66
-rw-r--r--gcc/testsuite/lib/target-supports.exp9
3 files changed, 116 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c b/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c
new file mode 100644
index 0000000..63e6c95
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_dotprod_hisi } */
+/* Ensure that, given the same input datatype, both the two-way and four-way
+ dot products are autovectorized, with the correct operation then selected
+ based on the distinct output types. */
+#include <stdint.h>
+
+uint32_t udot4(int n, uint8_t* data) {
+ uint32_t sum = 0;
+ for (int i=0; i<n; i+=1) {
+ sum += data[i] * data[i];
+ }
+ return sum;
+}
+
+int32_t sdot4(int n, int8_t* data) {
+ int32_t sum = 0;
+ for (int i=0; i<n; i+=1) {
+ sum += data[i] * data[i];
+ }
+ return sum;
+}
+
+uint32_t udot2(int n, uint16_t* data) {
+ uint32_t sum = 0;
+ for (int i=0; i<n; i+=1) {
+ sum += data[i] * data[i];
+ }
+ return sum;
+}
+
+int32_t sdot2(int n, int16_t* data) {
+ int32_t sum = 0;
+ for (int i=0; i<n; i+=1) {
+ sum += data[i] * data[i];
+ }
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c b/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c
new file mode 100644
index 0000000..0490faa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_dotprod_hisi } */
+/* { dg-options "-static -O3 -ftree-vectorize -fdump-tree-vect-details -save-temps" } */
+/* Ensure runtime correctness in the autovectorized two-way dot product operations. */
+
+#include <stdint.h>
+#include <stdlib.h>
+#pragma GCC target "+sme2"
+
+uint32_t
+udot2 (int n, uint16_t* data) __arm_streaming
+{
+ uint32_t sum = 0;
+ for (int i=0; i<n; i+=1) {
+ sum += data[i] * data[i];
+ }
+ return sum;
+}
+
+int32_t
+sdot2 (int n, int16_t* data) __arm_streaming
+{
+ int32_t sum = 0;
+ for (int i=0; i<n; i+=1) {
+ sum += data[i] * data[i];
+ }
+ return sum;
+}
+
+int
+main ()
+{
+
+ uint16_t u_input_nil[] = { [0 ... 3] = 0 };
+ uint16_t u_input_min[] = { [0 ... 3] = 1 };
+ uint16_t u_input_max[] = { [0 ... 3] = 32767};
+
+ uint32_t u_nil_dotprod = udot2 (4, u_input_nil);
+ uint32_t u_min_dotprod = udot2 (4, u_input_min);
+ uint32_t u_max_dotprod = udot2 (4, u_input_max);
+
+ if (u_nil_dotprod != 0
+ || u_min_dotprod != 4
+ || u_max_dotprod != 4294705156)
+ abort ();
+
+ int16_t s_input_nil[] = { [0 ... 3] = 0 };
+ int16_t s_input_min[] = { [0 ... 3] = -23170 };
+ int16_t s_input_max[] = { [0 ... 3] = 23170 };
+
+ int32_t s_nil_dotprod = sdot2 (4, s_input_nil);
+ int32_t s_min_dotprod = sdot2 (4, s_input_min);
+ int32_t s_max_dotprod = sdot2 (4, s_input_max);
+
+ if (s_nil_dotprod != 0
+ || s_min_dotprod != 2147395600
+ || s_max_dotprod != 2147395600)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 46 "vect" } } */
+/* { dg-final { scan-assembler "\[ \t\]udot\tz\[0-9\]+.s, z\[0-9\]+.h, z\[0-9\]+.h" } } */
+/* { dg-final { scan-assembler "\[ \t\]sdot\tz\[0-9\]+.s, z\[0-9\]+.h, z\[0-9\]+.h" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 05a63c4..f92f7f1 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4294,6 +4294,15 @@ proc check_effective_target_vect_int_div { } {
return [check_effective_target_aarch64_sve]
}
+# Return 1 if the target supports two-way dot products on inpus of hi mode
+# producing si outputs, 0 otherwise.
+
+proc check_effective_target_vect_dotprod_hisi { } {
+ return [check_cached_effective_target_indexed aarch64_sme2 {
+ expr { [check_effective_target_aarch64_sme2]
+ }}]
+}
+
# Return 1 if the target supports vectorization of early breaks,
# 0 otherwise.
#