aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-09-22 18:07:20 +0800
committerLehua Ding <lehua.ding@rivai.ai>2023-09-22 19:15:50 +0800
commitdc607a0d861c2d6e3067c6b2b5a106d07bba1980 (patch)
tree86180ba46130f5ef564a944f72618193cde0bbbf /gcc
parent1fea14def849dd38b098b0e2d54e64801f9c1f43 (diff)
downloadgcc-dc607a0d861c2d6e3067c6b2b5a106d07bba1980.zip
gcc-dc607a0d861c2d6e3067c6b2b5a106d07bba1980.tar.gz
gcc-dc607a0d861c2d6e3067c6b2b5a106d07bba1980.tar.bz2
RISC-V: Add VLS widen binary combine patterns
Regression passed. Committed. gcc/ChangeLog: * config/riscv/vector-iterators.md: Extend VLS modes. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/def.h: Add VLS modes cond tests. * gcc.target/riscv/rvv/autovec/vls/wadd-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/wadd-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/wadd-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/wadd-4.c: New test. * gcc.target/riscv/rvv/autovec/vls/wmul-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/wmul-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/wmul-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/wsub-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/wsub-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/wsub-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/wsub-4.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/vector-iterators.md44
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h27
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-1.c80
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-2.c38
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-3.c80
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-4.c38
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-1.c80
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-2.c38
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-3.c49
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-1.c80
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-2.c38
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-3.c80
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-4.c38
13 files changed, 710 insertions, 0 deletions
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 4aa6412..aa9bcc7 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -1074,6 +1074,28 @@
(RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
(RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
+
+ (V1SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32")
+ (V2SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32")
+ (V4SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32")
+ (V8SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32")
+ (V16SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64")
+ (V32SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+ (V64SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 256")
+ (V128SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 512")
+ (V256SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 1024")
+ (V512SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 2048")
+ (V1024SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096")
+ (V1DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64")
+ (V2DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64")
+ (V4DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64")
+ (V8DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64")
+ (V16DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+ (V32DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 256")
+ (V64DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 512")
+ (V128DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 1024")
+ (V256DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 2048")
+ (V512DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 4096")
])
(define_mode_iterator VWCONVERTI [
@@ -2173,6 +2195,28 @@
(V1SI "HI") (V2SI "HI") (V4SI "HI") (V8SI "HI") (V16SI "HI") (V32SI "HI") (V64SI "HI") (V128SI "HI") (V256SI "HI")
(V512SI "HI") (V1024SI "HI")
(V1DI "SI") (V2DI "SI") (V4DI "SI") (V8DI "SI") (V16DI "SI") (V32DI "SI") (V64DI "SI") (V128DI "SI") (V256DI "SI") (V512DI "SI")
+
+ (V1SF "HF")
+ (V2SF "HF")
+ (V4SF "HF")
+ (V8SF "HF")
+ (V16SF "HF")
+ (V32SF "HF")
+ (V64SF "HF")
+ (V128SF "HF")
+ (V256SF "HF")
+ (V512SF "HF")
+ (V1024SF "HF")
+ (V1DF "SF")
+ (V2DF "SF")
+ (V4DF "SF")
+ (V8DF "SF")
+ (V16DF "SF")
+ (V32DF "SF")
+ (V64DF "SF")
+ (V128DF "SF")
+ (V256DF "SF")
+ (V512DF "SF")
])
(define_mode_attr nf [
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
index 39495ef..bb5d243 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/def.h
@@ -596,3 +596,30 @@ typedef double v512df __attribute__ ((vector_size (4096)));
v[i] = cond[i] ? -(a[i] * b[i]) - c[i] : b[i]; \
return v; \
}
+
+#define DEF_OP_WVV(PREFIX, NUM, TYPE, TYPE2, OP) \
+ void __attribute__ ((noinline, noclone)) \
+ PREFIX##_##TYPE##_##TYPE2##NUM (TYPE2 *restrict a, TYPE *restrict b, \
+ TYPE *restrict c) \
+ { \
+ for (int i = 0; i < NUM; ++i) \
+ a[i] = (TYPE2) b[i] OP (TYPE2) c[i]; \
+ }
+
+#define DEF_OP_WWV(PREFIX, NUM, TYPE, TYPE2, OP) \
+ void __attribute__ ((noinline, noclone)) \
+ PREFIX##_##TYPE##_##TYPE2##NUM (TYPE2 *restrict a, TYPE2 *restrict b, \
+ TYPE *restrict c) \
+ { \
+ for (int i = 0; i < NUM; ++i) \
+ a[i] = b[i] OP (TYPE2) c[i]; \
+ }
+
+#define DEF_OP_WVV_SU(PREFIX, NUM, TYPE1, TYPE2, TYPE3, OP) \
+ void __attribute__ ((noinline, noclone)) \
+ PREFIX##_##TYPE##_##TYPE2##NUM (TYPE3 *restrict a, TYPE1 *restrict b, \
+ TYPE2 *restrict c) \
+ { \
+ for (int i = 0; i < NUM; ++i) \
+ a[i] = (TYPE3) b[i] OP (TYPE3) c[i]; \
+ }
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-1.c
new file mode 100644
index 0000000..bce56b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-1.c
@@ -0,0 +1,80 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WVV(wadd, 4, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 8, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 16, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 32, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 64, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 128, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 256, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 512, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 1024, int8_t, int16_t, +)
+DEF_OP_WVV(wadd, 2048, int8_t, int16_t, +)
+
+DEF_OP_WVV(wadd, 4, int16_t, int32_t, +)
+DEF_OP_WVV(wadd, 8, int16_t, int32_t, +)
+DEF_OP_WVV(wadd, 16, int16_t, int32_t, +)
+DEF_OP_WVV(wadd, 32, int16_t, int32_t, +)
+DEF_OP_WVV(wadd, 64, int16_t, int32_t, +)
+DEF_OP_WVV(wadd, 128, int16_t, int32_t, +)
+DEF_OP_WVV(wadd, 256, int16_t, int32_t, +)
+DEF_OP_WVV(wadd, 512, int16_t, int32_t, +)
+DEF_OP_WVV(wadd, 1024, int16_t, int32_t, +)
+
+DEF_OP_WVV(wadd, 4, int32_t, int64_t, +)
+DEF_OP_WVV(wadd, 8, int32_t, int64_t, +)
+DEF_OP_WVV(wadd, 16, int32_t, int64_t, +)
+DEF_OP_WVV(wadd, 32, int32_t, int64_t, +)
+DEF_OP_WVV(wadd, 64, int32_t, int64_t, +)
+DEF_OP_WVV(wadd, 128, int32_t, int64_t, +)
+DEF_OP_WVV(wadd, 256, int32_t, int64_t, +)
+DEF_OP_WVV(wadd, 512, int32_t, int64_t, +)
+
+DEF_OP_WVV(wadd, 4, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 8, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 16, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 32, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 64, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 128, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 256, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 512, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 1024, uint8_t, uint16_t, +)
+DEF_OP_WVV(wadd, 2048, uint8_t, uint16_t, +)
+
+DEF_OP_WVV(wadd, 4, uint16_t, uint32_t, +)
+DEF_OP_WVV(wadd, 8, uint16_t, uint32_t, +)
+DEF_OP_WVV(wadd, 16, uint16_t, uint32_t, +)
+DEF_OP_WVV(wadd, 32, uint16_t, uint32_t, +)
+DEF_OP_WVV(wadd, 64, uint16_t, uint32_t, +)
+DEF_OP_WVV(wadd, 128, uint16_t, uint32_t, +)
+DEF_OP_WVV(wadd, 256, uint16_t, uint32_t, +)
+DEF_OP_WVV(wadd, 512, uint16_t, uint32_t, +)
+DEF_OP_WVV(wadd, 1024, uint16_t, uint32_t, +)
+
+DEF_OP_WVV(wadd, 4, uint32_t, uint64_t, +)
+DEF_OP_WVV(wadd, 8, uint32_t, uint64_t, +)
+DEF_OP_WVV(wadd, 16, uint32_t, uint64_t, +)
+DEF_OP_WVV(wadd, 32, uint32_t, uint64_t, +)
+DEF_OP_WVV(wadd, 64, uint32_t, uint64_t, +)
+DEF_OP_WVV(wadd, 128, uint32_t, uint64_t, +)
+DEF_OP_WVV(wadd, 256, uint32_t, uint64_t, +)
+DEF_OP_WVV(wadd, 512, uint32_t, uint64_t, +)
+
+/* { dg-final { scan-assembler-times {vwadd\.vv} 27 } } */
+/* { dg-final { scan-assembler-times {vwaddu\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-2.c
new file mode 100644
index 0000000..d0b55c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-2.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WVV(wadd, 4, _Float16, float, +)
+DEF_OP_WVV(wadd, 8, _Float16, float, +)
+DEF_OP_WVV(wadd, 16, _Float16, float, +)
+DEF_OP_WVV(wadd, 32, _Float16, float, +)
+DEF_OP_WVV(wadd, 64, _Float16, float, +)
+DEF_OP_WVV(wadd, 128, _Float16, float, +)
+DEF_OP_WVV(wadd, 256, _Float16, float, +)
+DEF_OP_WVV(wadd, 512, _Float16, float, +)
+DEF_OP_WVV(wadd, 1024, _Float16, float, +)
+
+DEF_OP_WVV(wadd, 4, float, double, +)
+DEF_OP_WVV(wadd, 8, float, double, +)
+DEF_OP_WVV(wadd, 16, float, double, +)
+DEF_OP_WVV(wadd, 32, float, double, +)
+DEF_OP_WVV(wadd, 64, float, double, +)
+DEF_OP_WVV(wadd, 128, float, double, +)
+DEF_OP_WVV(wadd, 256, float, double, +)
+DEF_OP_WVV(wadd, 512, float, double, +)
+
+/* { dg-final { scan-assembler-times {vfwadd\.vv} 17 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-3.c
new file mode 100644
index 0000000..b6067c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-3.c
@@ -0,0 +1,80 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WWV(wadd, 4, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 8, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 16, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 32, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 64, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 128, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 256, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 512, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 1024, int8_t, int16_t, +)
+DEF_OP_WWV(wadd, 2048, int8_t, int16_t, +)
+
+DEF_OP_WWV(wadd, 4, int16_t, int32_t, +)
+DEF_OP_WWV(wadd, 8, int16_t, int32_t, +)
+DEF_OP_WWV(wadd, 16, int16_t, int32_t, +)
+DEF_OP_WWV(wadd, 32, int16_t, int32_t, +)
+DEF_OP_WWV(wadd, 64, int16_t, int32_t, +)
+DEF_OP_WWV(wadd, 128, int16_t, int32_t, +)
+DEF_OP_WWV(wadd, 256, int16_t, int32_t, +)
+DEF_OP_WWV(wadd, 512, int16_t, int32_t, +)
+DEF_OP_WWV(wadd, 1024, int16_t, int32_t, +)
+
+DEF_OP_WWV(wadd, 4, int32_t, int64_t, +)
+DEF_OP_WWV(wadd, 8, int32_t, int64_t, +)
+DEF_OP_WWV(wadd, 16, int32_t, int64_t, +)
+DEF_OP_WWV(wadd, 32, int32_t, int64_t, +)
+DEF_OP_WWV(wadd, 64, int32_t, int64_t, +)
+DEF_OP_WWV(wadd, 128, int32_t, int64_t, +)
+DEF_OP_WWV(wadd, 256, int32_t, int64_t, +)
+DEF_OP_WWV(wadd, 512, int32_t, int64_t, +)
+
+DEF_OP_WWV(wadd, 4, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 8, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 16, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 32, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 64, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 128, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 256, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 512, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 1024, uint8_t, uint16_t, +)
+DEF_OP_WWV(wadd, 2048, uint8_t, uint16_t, +)
+
+DEF_OP_WWV(wadd, 4, uint16_t, uint32_t, +)
+DEF_OP_WWV(wadd, 8, uint16_t, uint32_t, +)
+DEF_OP_WWV(wadd, 16, uint16_t, uint32_t, +)
+DEF_OP_WWV(wadd, 32, uint16_t, uint32_t, +)
+DEF_OP_WWV(wadd, 64, uint16_t, uint32_t, +)
+DEF_OP_WWV(wadd, 128, uint16_t, uint32_t, +)
+DEF_OP_WWV(wadd, 256, uint16_t, uint32_t, +)
+DEF_OP_WWV(wadd, 512, uint16_t, uint32_t, +)
+DEF_OP_WWV(wadd, 1024, uint16_t, uint32_t, +)
+
+DEF_OP_WWV(wadd, 4, uint32_t, uint64_t, +)
+DEF_OP_WWV(wadd, 8, uint32_t, uint64_t, +)
+DEF_OP_WWV(wadd, 16, uint32_t, uint64_t, +)
+DEF_OP_WWV(wadd, 32, uint32_t, uint64_t, +)
+DEF_OP_WWV(wadd, 64, uint32_t, uint64_t, +)
+DEF_OP_WWV(wadd, 128, uint32_t, uint64_t, +)
+DEF_OP_WWV(wadd, 256, uint32_t, uint64_t, +)
+DEF_OP_WWV(wadd, 512, uint32_t, uint64_t, +)
+
+/* { dg-final { scan-assembler-times {vwadd\.wv} 27 } } */
+/* { dg-final { scan-assembler-times {vwaddu\.wv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-4.c
new file mode 100644
index 0000000..253750a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wadd-4.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WWV(wadd, 4, _Float16, float, +)
+DEF_OP_WWV(wadd, 8, _Float16, float, +)
+DEF_OP_WWV(wadd, 16, _Float16, float, +)
+DEF_OP_WWV(wadd, 32, _Float16, float, +)
+DEF_OP_WWV(wadd, 64, _Float16, float, +)
+DEF_OP_WWV(wadd, 128, _Float16, float, +)
+DEF_OP_WWV(wadd, 256, _Float16, float, +)
+DEF_OP_WWV(wadd, 512, _Float16, float, +)
+DEF_OP_WWV(wadd, 1024, _Float16, float, +)
+
+DEF_OP_WWV(wadd, 4, float, double, +)
+DEF_OP_WWV(wadd, 8, float, double, +)
+DEF_OP_WWV(wadd, 16, float, double, +)
+DEF_OP_WWV(wadd, 32, float, double, +)
+DEF_OP_WWV(wadd, 64, float, double, +)
+DEF_OP_WWV(wadd, 128, float, double, +)
+DEF_OP_WWV(wadd, 256, float, double, +)
+DEF_OP_WWV(wadd, 512, float, double, +)
+
+/* { dg-final { scan-assembler-times {vfwadd\.wv} 17 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-1.c
new file mode 100644
index 0000000..8269dfa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-1.c
@@ -0,0 +1,80 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WVV(wmul, 4, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 8, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 16, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 32, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 64, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 128, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 256, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 512, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 1024, int8_t, int16_t, *)
+DEF_OP_WVV(wmul, 2048, int8_t, int16_t, *)
+
+DEF_OP_WVV(wmul, 4, int16_t, int32_t, *)
+DEF_OP_WVV(wmul, 8, int16_t, int32_t, *)
+DEF_OP_WVV(wmul, 16, int16_t, int32_t, *)
+DEF_OP_WVV(wmul, 32, int16_t, int32_t, *)
+DEF_OP_WVV(wmul, 64, int16_t, int32_t, *)
+DEF_OP_WVV(wmul, 128, int16_t, int32_t, *)
+DEF_OP_WVV(wmul, 256, int16_t, int32_t, *)
+DEF_OP_WVV(wmul, 512, int16_t, int32_t, *)
+DEF_OP_WVV(wmul, 1024, int16_t, int32_t, *)
+
+DEF_OP_WVV(wmul, 4, int32_t, int64_t, *)
+DEF_OP_WVV(wmul, 8, int32_t, int64_t, *)
+DEF_OP_WVV(wmul, 16, int32_t, int64_t, *)
+DEF_OP_WVV(wmul, 32, int32_t, int64_t, *)
+DEF_OP_WVV(wmul, 64, int32_t, int64_t, *)
+DEF_OP_WVV(wmul, 128, int32_t, int64_t, *)
+DEF_OP_WVV(wmul, 256, int32_t, int64_t, *)
+DEF_OP_WVV(wmul, 512, int32_t, int64_t, *)
+
+DEF_OP_WVV(wmul, 4, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 8, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 16, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 32, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 64, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 128, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 256, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 512, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 1024, uint8_t, uint16_t, *)
+DEF_OP_WVV(wmul, 2048, uint8_t, uint16_t, *)
+
+DEF_OP_WVV(wmul, 4, uint16_t, uint32_t, *)
+DEF_OP_WVV(wmul, 8, uint16_t, uint32_t, *)
+DEF_OP_WVV(wmul, 16, uint16_t, uint32_t, *)
+DEF_OP_WVV(wmul, 32, uint16_t, uint32_t, *)
+DEF_OP_WVV(wmul, 64, uint16_t, uint32_t, *)
+DEF_OP_WVV(wmul, 128, uint16_t, uint32_t, *)
+DEF_OP_WVV(wmul, 256, uint16_t, uint32_t, *)
+DEF_OP_WVV(wmul, 512, uint16_t, uint32_t, *)
+DEF_OP_WVV(wmul, 1024, uint16_t, uint32_t, *)
+
+DEF_OP_WVV(wmul, 4, uint32_t, uint64_t, *)
+DEF_OP_WVV(wmul, 8, uint32_t, uint64_t, *)
+DEF_OP_WVV(wmul, 16, uint32_t, uint64_t, *)
+DEF_OP_WVV(wmul, 32, uint32_t, uint64_t, *)
+DEF_OP_WVV(wmul, 64, uint32_t, uint64_t, *)
+DEF_OP_WVV(wmul, 128, uint32_t, uint64_t, *)
+DEF_OP_WVV(wmul, 256, uint32_t, uint64_t, *)
+DEF_OP_WVV(wmul, 512, uint32_t, uint64_t, *)
+
+/* { dg-final { scan-assembler-times {vwmul\.vv} 27 } } */
+/* { dg-final { scan-assembler-times {vwmulu\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-2.c
new file mode 100644
index 0000000..3675388
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-2.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WVV(wmul, 4, _Float16, float, *)
+DEF_OP_WVV(wmul, 8, _Float16, float, *)
+DEF_OP_WVV(wmul, 16, _Float16, float, *)
+DEF_OP_WVV(wmul, 32, _Float16, float, *)
+DEF_OP_WVV(wmul, 64, _Float16, float, *)
+DEF_OP_WVV(wmul, 128, _Float16, float, *)
+DEF_OP_WVV(wmul, 256, _Float16, float, *)
+DEF_OP_WVV(wmul, 512, _Float16, float, *)
+DEF_OP_WVV(wmul, 1024, _Float16, float, *)
+
+DEF_OP_WVV(wmul, 4, float, double, *)
+DEF_OP_WVV(wmul, 8, float, double, *)
+DEF_OP_WVV(wmul, 16, float, double, *)
+DEF_OP_WVV(wmul, 32, float, double, *)
+DEF_OP_WVV(wmul, 64, float, double, *)
+DEF_OP_WVV(wmul, 128, float, double, *)
+DEF_OP_WVV(wmul, 256, float, double, *)
+DEF_OP_WVV(wmul, 512, float, double, *)
+
+/* { dg-final { scan-assembler-times {vfwmul\.vv} 17 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-3.c
new file mode 100644
index 0000000..813a9a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wmul-3.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WVV_SU(wmul, 4, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 8, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 16, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 32, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 64, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 128, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 256, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 512, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 1024, int8_t, uint8_t, int16_t, *)
+DEF_OP_WVV_SU(wmul, 2048, int8_t, uint8_t, int16_t, *)
+
+DEF_OP_WVV_SU(wmul, 4, int16_t, uint16_t, int32_t, *)
+DEF_OP_WVV_SU(wmul, 8, int16_t, uint16_t, int32_t, *)
+DEF_OP_WVV_SU(wmul, 16, int16_t, uint16_t, int32_t, *)
+DEF_OP_WVV_SU(wmul, 32, int16_t, uint16_t, int32_t, *)
+DEF_OP_WVV_SU(wmul, 64, int16_t, uint16_t, int32_t, *)
+DEF_OP_WVV_SU(wmul, 128, int16_t, uint16_t, int32_t, *)
+DEF_OP_WVV_SU(wmul, 256, int16_t, uint16_t, int32_t, *)
+DEF_OP_WVV_SU(wmul, 512, int16_t, uint16_t, int32_t, *)
+DEF_OP_WVV_SU(wmul, 1024, int16_t, uint16_t, int32_t, *)
+
+DEF_OP_WVV_SU(wmul, 4, int32_t, uint32_t, int64_t, *)
+DEF_OP_WVV_SU(wmul, 8, int32_t, uint32_t, int64_t, *)
+DEF_OP_WVV_SU(wmul, 16, int32_t, uint32_t, int64_t, *)
+DEF_OP_WVV_SU(wmul, 32, int32_t, uint32_t, int64_t, *)
+DEF_OP_WVV_SU(wmul, 64, int32_t, uint32_t, int64_t, *)
+DEF_OP_WVV_SU(wmul, 128, int32_t, uint32_t, int64_t, *)
+DEF_OP_WVV_SU(wmul, 256, int32_t, uint32_t, int64_t, *)
+DEF_OP_WVV_SU(wmul, 512, int32_t, uint32_t, int64_t, *)
+
+/* { dg-final { scan-assembler-times {vwmulsu\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-1.c
new file mode 100644
index 0000000..eea9540
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-1.c
@@ -0,0 +1,80 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WVV(wsub, 4, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 8, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 16, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 32, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 64, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 128, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 256, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 512, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 1024, int8_t, int16_t, -)
+DEF_OP_WVV(wsub, 2048, int8_t, int16_t, -)
+
+DEF_OP_WVV(wsub, 4, int16_t, int32_t, -)
+DEF_OP_WVV(wsub, 8, int16_t, int32_t, -)
+DEF_OP_WVV(wsub, 16, int16_t, int32_t, -)
+DEF_OP_WVV(wsub, 32, int16_t, int32_t, -)
+DEF_OP_WVV(wsub, 64, int16_t, int32_t, -)
+DEF_OP_WVV(wsub, 128, int16_t, int32_t, -)
+DEF_OP_WVV(wsub, 256, int16_t, int32_t, -)
+DEF_OP_WVV(wsub, 512, int16_t, int32_t, -)
+DEF_OP_WVV(wsub, 1024, int16_t, int32_t, -)
+
+DEF_OP_WVV(wsub, 4, int32_t, int64_t, -)
+DEF_OP_WVV(wsub, 8, int32_t, int64_t, -)
+DEF_OP_WVV(wsub, 16, int32_t, int64_t, -)
+DEF_OP_WVV(wsub, 32, int32_t, int64_t, -)
+DEF_OP_WVV(wsub, 64, int32_t, int64_t, -)
+DEF_OP_WVV(wsub, 128, int32_t, int64_t, -)
+DEF_OP_WVV(wsub, 256, int32_t, int64_t, -)
+DEF_OP_WVV(wsub, 512, int32_t, int64_t, -)
+
+DEF_OP_WVV(wsub, 4, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 8, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 16, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 32, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 64, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 128, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 256, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 512, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 1024, uint8_t, uint16_t, -)
+DEF_OP_WVV(wsub, 2048, uint8_t, uint16_t, -)
+
+DEF_OP_WVV(wsub, 4, uint16_t, uint32_t, -)
+DEF_OP_WVV(wsub, 8, uint16_t, uint32_t, -)
+DEF_OP_WVV(wsub, 16, uint16_t, uint32_t, -)
+DEF_OP_WVV(wsub, 32, uint16_t, uint32_t, -)
+DEF_OP_WVV(wsub, 64, uint16_t, uint32_t, -)
+DEF_OP_WVV(wsub, 128, uint16_t, uint32_t, -)
+DEF_OP_WVV(wsub, 256, uint16_t, uint32_t, -)
+DEF_OP_WVV(wsub, 512, uint16_t, uint32_t, -)
+DEF_OP_WVV(wsub, 1024, uint16_t, uint32_t, -)
+
+DEF_OP_WVV(wsub, 4, uint32_t, uint64_t, -)
+DEF_OP_WVV(wsub, 8, uint32_t, uint64_t, -)
+DEF_OP_WVV(wsub, 16, uint32_t, uint64_t, -)
+DEF_OP_WVV(wsub, 32, uint32_t, uint64_t, -)
+DEF_OP_WVV(wsub, 64, uint32_t, uint64_t, -)
+DEF_OP_WVV(wsub, 128, uint32_t, uint64_t, -)
+DEF_OP_WVV(wsub, 256, uint32_t, uint64_t, -)
+DEF_OP_WVV(wsub, 512, uint32_t, uint64_t, -)
+
+/* { dg-final { scan-assembler-times {vwsub\.vv} 27 } } */
+/* { dg-final { scan-assembler-times {vwsubu\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-2.c
new file mode 100644
index 0000000..1048d29
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-2.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WVV(wsub, 4, _Float16, float, -)
+DEF_OP_WVV(wsub, 8, _Float16, float, -)
+DEF_OP_WVV(wsub, 16, _Float16, float, -)
+DEF_OP_WVV(wsub, 32, _Float16, float, -)
+DEF_OP_WVV(wsub, 64, _Float16, float, -)
+DEF_OP_WVV(wsub, 128, _Float16, float, -)
+DEF_OP_WVV(wsub, 256, _Float16, float, -)
+DEF_OP_WVV(wsub, 512, _Float16, float, -)
+DEF_OP_WVV(wsub, 1024, _Float16, float, -)
+
+DEF_OP_WVV(wsub, 4, float, double, -)
+DEF_OP_WVV(wsub, 8, float, double, -)
+DEF_OP_WVV(wsub, 16, float, double, -)
+DEF_OP_WVV(wsub, 32, float, double, -)
+DEF_OP_WVV(wsub, 64, float, double, -)
+DEF_OP_WVV(wsub, 128, float, double, -)
+DEF_OP_WVV(wsub, 256, float, double, -)
+DEF_OP_WVV(wsub, 512, float, double, -)
+
+/* { dg-final { scan-assembler-times {vfwsub\.vv} 17 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-3.c
new file mode 100644
index 0000000..ac4bfe2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-3.c
@@ -0,0 +1,80 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WWV(wsub, 4, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 8, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 16, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 32, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 64, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 128, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 256, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 512, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 1024, int8_t, int16_t, -)
+DEF_OP_WWV(wsub, 2048, int8_t, int16_t, -)
+
+DEF_OP_WWV(wsub, 4, int16_t, int32_t, -)
+DEF_OP_WWV(wsub, 8, int16_t, int32_t, -)
+DEF_OP_WWV(wsub, 16, int16_t, int32_t, -)
+DEF_OP_WWV(wsub, 32, int16_t, int32_t, -)
+DEF_OP_WWV(wsub, 64, int16_t, int32_t, -)
+DEF_OP_WWV(wsub, 128, int16_t, int32_t, -)
+DEF_OP_WWV(wsub, 256, int16_t, int32_t, -)
+DEF_OP_WWV(wsub, 512, int16_t, int32_t, -)
+DEF_OP_WWV(wsub, 1024, int16_t, int32_t, -)
+
+DEF_OP_WWV(wsub, 4, int32_t, int64_t, -)
+DEF_OP_WWV(wsub, 8, int32_t, int64_t, -)
+DEF_OP_WWV(wsub, 16, int32_t, int64_t, -)
+DEF_OP_WWV(wsub, 32, int32_t, int64_t, -)
+DEF_OP_WWV(wsub, 64, int32_t, int64_t, -)
+DEF_OP_WWV(wsub, 128, int32_t, int64_t, -)
+DEF_OP_WWV(wsub, 256, int32_t, int64_t, -)
+DEF_OP_WWV(wsub, 512, int32_t, int64_t, -)
+
+DEF_OP_WWV(wsub, 4, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 8, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 16, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 32, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 64, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 128, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 256, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 512, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 1024, uint8_t, uint16_t, -)
+DEF_OP_WWV(wsub, 2048, uint8_t, uint16_t, -)
+
+DEF_OP_WWV(wsub, 4, uint16_t, uint32_t, -)
+DEF_OP_WWV(wsub, 8, uint16_t, uint32_t, -)
+DEF_OP_WWV(wsub, 16, uint16_t, uint32_t, -)
+DEF_OP_WWV(wsub, 32, uint16_t, uint32_t, -)
+DEF_OP_WWV(wsub, 64, uint16_t, uint32_t, -)
+DEF_OP_WWV(wsub, 128, uint16_t, uint32_t, -)
+DEF_OP_WWV(wsub, 256, uint16_t, uint32_t, -)
+DEF_OP_WWV(wsub, 512, uint16_t, uint32_t, -)
+DEF_OP_WWV(wsub, 1024, uint16_t, uint32_t, -)
+
+DEF_OP_WWV(wsub, 4, uint32_t, uint64_t, -)
+DEF_OP_WWV(wsub, 8, uint32_t, uint64_t, -)
+DEF_OP_WWV(wsub, 16, uint32_t, uint64_t, -)
+DEF_OP_WWV(wsub, 32, uint32_t, uint64_t, -)
+DEF_OP_WWV(wsub, 64, uint32_t, uint64_t, -)
+DEF_OP_WWV(wsub, 128, uint32_t, uint64_t, -)
+DEF_OP_WWV(wsub, 256, uint32_t, uint64_t, -)
+DEF_OP_WWV(wsub, 512, uint32_t, uint64_t, -)
+
+/* { dg-final { scan-assembler-times {vwsub\.wv} 27 } } */
+/* { dg-final { scan-assembler-times {vwsubu\.wv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-4.c
new file mode 100644
index 0000000..619c0c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/wsub-4.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_WWV(wsub, 4, _Float16, float, -)
+DEF_OP_WWV(wsub, 8, _Float16, float, -)
+DEF_OP_WWV(wsub, 16, _Float16, float, -)
+DEF_OP_WWV(wsub, 32, _Float16, float, -)
+DEF_OP_WWV(wsub, 64, _Float16, float, -)
+DEF_OP_WWV(wsub, 128, _Float16, float, -)
+DEF_OP_WWV(wsub, 256, _Float16, float, -)
+DEF_OP_WWV(wsub, 512, _Float16, float, -)
+DEF_OP_WWV(wsub, 1024, _Float16, float, -)
+
+DEF_OP_WWV(wsub, 4, float, double, -)
+DEF_OP_WWV(wsub, 8, float, double, -)
+DEF_OP_WWV(wsub, 16, float, double, -)
+DEF_OP_WWV(wsub, 32, float, double, -)
+DEF_OP_WWV(wsub, 64, float, double, -)
+DEF_OP_WWV(wsub, 128, float, double, -)
+DEF_OP_WWV(wsub, 256, float, double, -)
+DEF_OP_WWV(wsub, 512, float, double, -)
+
+/* { dg-final { scan-assembler-times {vfwsub\.wv} 17 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */