diff options
author | Victor Do Nascimento <victor.donascimento@arm.com> | 2024-07-05 15:18:32 +0100 |
---|---|---|
committer | Victor Do Nascimento <victor.donascimento@arm.com> | 2024-09-30 15:59:43 +0100 |
commit | 8398ef96cc503cffb1447c5b02741e24423ec120 (patch) | |
tree | 9d208c944a8ed582e229a25c9804925b036e496e /gcc | |
parent | fd35d99914051c9c58b91b167f4802c8db460038 (diff) | |
download | gcc-8398ef96cc503cffb1447c5b02741e24423ec120.zip gcc-8398ef96cc503cffb1447c5b02741e24423ec120.tar.gz gcc-8398ef96cc503cffb1447c5b02741e24423ec120.tar.bz2 |
autovectorizer: Test autovectorization of different dot-prod modes.
Given the novel treatment of the dot product optab as a conversion, we
are now able to target different relationships between output modes and
input modes.
This is made clearer by way of example. Previously, on AArch64, the
following loop was vectorizable:
uint32_t udot4(int n, uint8_t* data) {
uint32_t sum = 0;
for (int i=0; i<n; i+=1)
sum += data[i] * data[i];
return sum;
}
while the following was not:
uint32_t udot2(int n, uint16_t* data) {
uint32_t sum = 0;
for (int i=0; i<n; i+=1)
sum += data[i] * data[i];
return sum;
}
Under the new treatment of the dot product optab, they are both now
vectorizable.
This adds the relevant target-agnostic check to ensure this behavior
in the autovectorizer, gated behind the new check_effective_target
`vect_dotprod_hisi' as well a runtime check targeting aarch64.
gcc/testsuite/ChangeLog:
* lib/target-supports.exp (check_effective_target_vect_dotprod_hisi):
New.
* gcc.dg/vect/vect-dotprod-conv-optab.c: Likewise.
* gcc.target/aarch64/vect-dotprod-twoway-hisi.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c | 41 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c | 66 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 9 |
3 files changed, 116 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c b/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c new file mode 100644 index 0000000..63e6c95 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-dotprod-conv-optab.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_dotprod_hisi } */ +/* Ensure that, given the same input datatype, both the two-way and four-way + dot products are autovectorized, with the correct operation then selected + based on the distinct output types. */ +#include <stdint.h> + +uint32_t udot4(int n, uint8_t* data) { + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t sdot4(int n, int8_t* data) { + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +uint32_t udot2(int n, uint16_t* data) { + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t sdot2(int n, int16_t* data) { + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c b/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c new file mode 100644 index 0000000..0490faa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-dotprod-twoway-hisi.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_dotprod_hisi } */ +/* { dg-options "-static -O3 -ftree-vectorize -fdump-tree-vect-details -save-temps" } */ +/* Ensure runtime correctness in the autovectorized two-way dot product operations. */ + +#include <stdint.h> +#include <stdlib.h> +#pragma GCC target "+sme2" + +uint32_t +udot2 (int n, uint16_t* data) __arm_streaming +{ + uint32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int32_t +sdot2 (int n, int16_t* data) __arm_streaming +{ + int32_t sum = 0; + for (int i=0; i<n; i+=1) { + sum += data[i] * data[i]; + } + return sum; +} + +int +main () +{ + + uint16_t u_input_nil[] = { [0 ... 3] = 0 }; + uint16_t u_input_min[] = { [0 ... 3] = 1 }; + uint16_t u_input_max[] = { [0 ... 3] = 32767}; + + uint32_t u_nil_dotprod = udot2 (4, u_input_nil); + uint32_t u_min_dotprod = udot2 (4, u_input_min); + uint32_t u_max_dotprod = udot2 (4, u_input_max); + + if (u_nil_dotprod != 0 + || u_min_dotprod != 4 + || u_max_dotprod != 4294705156) + abort (); + + int16_t s_input_nil[] = { [0 ... 3] = 0 }; + int16_t s_input_min[] = { [0 ... 3] = -23170 }; + int16_t s_input_max[] = { [0 ... 3] = 23170 }; + + int32_t s_nil_dotprod = sdot2 (4, s_input_nil); + int32_t s_min_dotprod = sdot2 (4, s_input_min); + int32_t s_max_dotprod = sdot2 (4, s_input_max); + + if (s_nil_dotprod != 0 + || s_min_dotprod != 2147395600 + || s_max_dotprod != 2147395600) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 46 "vect" } } */ +/* { dg-final { scan-assembler "\[ \t\]udot\tz\[0-9\]+.s, z\[0-9\]+.h, z\[0-9\]+.h" } } */ +/* { dg-final { scan-assembler "\[ \t\]sdot\tz\[0-9\]+.s, z\[0-9\]+.h, z\[0-9\]+.h" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 05a63c4..f92f7f1 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4294,6 +4294,15 @@ proc check_effective_target_vect_int_div { } { return [check_effective_target_aarch64_sve] } +# Return 1 if the target supports two-way dot products on inpus of hi mode +# producing si outputs, 0 otherwise. + +proc check_effective_target_vect_dotprod_hisi { } { + return [check_cached_effective_target_indexed aarch64_sme2 { + expr { [check_effective_target_aarch64_sme2] + }}] +} + # Return 1 if the target supports vectorization of early breaks, # 0 otherwise. # |