diff options
author | Juzhe-Zhong <juzhe.zhong@rivai.ai> | 2023-07-31 10:13:57 +0800 |
---|---|---|
committer | Pan Li <pan2.li@intel.com> | 2023-07-31 10:48:29 +0800 |
commit | 92a891e869d35c940d2a7b92355af02d78c9a86e (patch) | |
tree | 68864b312b89ba74cb57b4e357952518814ab208 | |
parent | bf36656a14aa691ca674d27f26aba9de420041d7 (diff) | |
download | gcc-92a891e869d35c940d2a7b92355af02d78c9a86e.zip gcc-92a891e869d35c940d2a7b92355af02d78c9a86e.tar.gz gcc-92a891e869d35c940d2a7b92355af02d78c9a86e.tar.bz2 |
RISC-V: Enable basic VLS auto-vectorization
Consider this following case:
void
foo (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 16; i++)
in[i] = x;
}
Compile option: --param=riscv-autovec-preference=scalable -fno-builtin
Before this patch:
foo:
li a5,16
csrr a4,vlenb
vsetvli a3,zero,e8,m1,ta,ma
vmv.v.x v1,a2
bleu a5,a4,.L2
mv a5,a4
.L2:
vsetvli zero,a5,e8,m1,ta,ma
vse8.v v1,0(a0)
ret
After this patch:
foo:
vsetivli zero,16,e8,mf8,ta,ma
vmv.v.x v1,a2
vse8.v v1,0(a0)
ret
gcc/ChangeLog:
* config/riscv/autovec-vls.md (@vec_duplicate<mode>): New pattern.
* config/riscv/riscv-v.cc (autovectorize_vector_modes): Add VLS autovec
support.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/v-1.c: Adapt test.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test.
-rw-r--r-- | gcc/config/riscv/autovec-vls.md | 19 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-v.cc | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c | 168 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c | 153 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c | 153 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c | 137 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c | 137 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c | 122 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c | 122 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c | 2 |
13 files changed, 1034 insertions, 6 deletions
diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md index 9ece317..1a64dfd 100644 --- a/gcc/config/riscv/autovec-vls.md +++ b/gcc/config/riscv/autovec-vls.md @@ -139,3 +139,22 @@ "vmv%m1r.v\t%0,%1" [(set_attr "type" "vmov") (set_attr "mode" "<MODE>")]) + +;; ----------------------------------------------------------------- +;; ---- Duplicate Operations +;; ----------------------------------------------------------------- + +(define_insn_and_split "@vec_duplicate<mode>" + [(set (match_operand:VLS 0 "register_operand") + (vec_duplicate:VLS + (match_operand:<VEL> 1 "reg_or_int_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), + riscv_vector::RVV_UNOP, operands); + DONE; + } +) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 4cfbcf7..0a355eb 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2475,7 +2475,6 @@ autovectorize_vector_modes (vector_modes *modes, bool) { if (autovec_use_vlmax_p ()) { - /* TODO: We will support RVV VLS auto-vectorization mode in the future. */ poly_uint64 full_size = BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul); @@ -2503,7 +2502,25 @@ autovectorize_vector_modes (vector_modes *modes, bool) modes->safe_push (mode); } } - return 0; + unsigned int flag = 0; + if (TARGET_VECTOR_VLS) + { + /* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable + auto-vectorization. */ + flag |= VECT_COMPARE_COSTS; + /* Push all VLSmodes according to TARGET_MIN_VLEN. */ + unsigned int i = 0; + unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8; + unsigned int size = base_size; + machine_mode mode; + while (size > 0 && get_vector_mode (QImode, size).exists (&mode)) + { + modes->safe_push (mode); + i++; + size = base_size / (1U << i); + } + } + return flag; } /* If the given VECTOR_MODE is an RVV mode, first get the largest number diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c index e68d05f..ebbe5e2 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/v-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c new file mode 100644 index 0000000..1f520f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c @@ -0,0 +1,168 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} + +/* +** foo11: +** li\s+[a-x0-9]+,4096 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo11 (int8_t *in, int8_t *out, int8_t x) +{ + for (int i = 0; i < 4096; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c new file mode 100644 index 0000000..1a930d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c @@ -0,0 +1,153 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (int16_t *in, int16_t *out, int16_t x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c new file mode 100644 index 0000000..46fb5a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c @@ -0,0 +1,153 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} + +/* +** foo10: +** li\s+[a-x0-9]+,4096 +** addi\s+[a-x0-9]+,[a-x0-9]+,-2048 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo10 (_Float16 *in, _Float16 *out, _Float16 x) +{ + for (int i = 0; i < 2048; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c new file mode 100644 index 0000000..7e46dc4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c @@ -0,0 +1,137 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (int32_t *in, int32_t *out, int32_t x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c new file mode 100644 index 0000000..9b9327b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c @@ -0,0 +1,137 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (float *in, float *out, float x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (float *in, float *out, float x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (float *in, float *out, float x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (float *in, float *out, float x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (float *in, float *out, float x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (float *in, float *out, float x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (float *in, float *out, float x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (float *in, float *out, float x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} + +/* +** foo9: +** li\s+[a-x0-9]+,1024 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo9 (float *in, float *out, float x) +{ + for (int i = 0; i < 1024; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c new file mode 100644 index 0000000..52d5a65 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c @@ -0,0 +1,122 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au] +** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (int64_t *in, int64_t *out, int64_t x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c new file mode 100644 index 0000000..39f27ec --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c @@ -0,0 +1,122 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "def.h" + +/* +** foo1: +** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo1 (double *in, double *out, double x) +{ + for (int i = 0; i < 4; i++) + in[i] = x; +} + +/* +** foo2: +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo2 (double *in, double *out, double x) +{ + for (int i = 0; i < 8; i++) + in[i] = x; +} + +/* +** foo3: +** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo3 (double *in, double *out, double x) +{ + for (int i = 0; i < 16; i++) + in[i] = x; +} + +/* +** foo4: +** li\s+[a-x0-9]+,32 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo4 (double *in, double *out, double x) +{ + for (int i = 0; i < 32; i++) + in[i] = x; +} + +/* +** foo5: +** li\s+[a-x0-9]+,64 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo5 (double *in, double *out, double x) +{ + for (int i = 0; i < 64; i++) + in[i] = x; +} + +/* +** foo6: +** li\s+[a-x0-9]+,128 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo6 (double *in, double *out, double x) +{ + for (int i = 0; i < 128; i++) + in[i] = x; +} + +/* +** foo7: +** li\s+[a-x0-9]+,256 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo7 (double *in, double *out, double x) +{ + for (int i = 0; i < 256; i++) + in[i] = x; +} + +/* +** foo8: +** li\s+[a-x0-9]+,512 +** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au] +** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+ +** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\) +** ret +*/ +void +foo8 (double *in, double *out, double x) +{ + for (int i = 0; i < 512; i++) + in[i] = x; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c index ecfda79..345e2f9 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c index 6b320ca..e13c27d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c index ae3f066..e767629 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c @@ -3,4 +3,4 @@ #include "template-1.h" -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */ |