aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
-rw-r--r--llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/make-compressible.mir499
-rw-r--r--llvm/test/CodeGen/RISCV/pr51206.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll30
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll1701
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll1697
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll1745
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll1740
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll32
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll2015
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll2014
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll2067
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll2065
-rw-r--r--llvm/test/CodeGen/RISCV/select.ll252
-rw-r--r--llvm/test/CodeGen/RISCV/sext-zext-trunc.ll61
-rw-r--r--llvm/test/CodeGen/RISCV/split-offsets.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/srem-vector-lkk.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/urem-vector-lkk.ll8
18 files changed, 15767 insertions, 191 deletions
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
index 91e7399..3c2e846 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
-; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV32I %s
+; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -code-model=medium < %s \
-; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV32I-MEDIUM %s
+; RUN: | FileCheck -check-prefix=RV32I-MEDIUM %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
-; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV64I %s
+; RUN: | FileCheck -check-prefix=RV64I %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -code-model=medium < %s \
-; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV64I-MEDIUM %s
+; RUN: | FileCheck -check-prefix=RV64I-MEDIUM %s
; We can often fold an ADDI into the offset of load/store instructions:
; (load (addi base, off1), off2) -> (load base, off1+off2)
diff --git a/llvm/test/CodeGen/RISCV/make-compressible.mir b/llvm/test/CodeGen/RISCV/make-compressible.mir
index 2105a13..03da38a 100644
--- a/llvm/test/CodeGen/RISCV/make-compressible.mir
+++ b/llvm/test/CodeGen/RISCV/make-compressible.mir
@@ -1,8 +1,14 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -o - %s -mtriple=riscv32 -mattr=+c,+f,+d -simplify-mir \
-# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefix=RV32 %s
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV32,RV32C %s
# RUN: llc -o - %s -mtriple=riscv64 -mattr=+c,+f,+d -simplify-mir \
-# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefix=RV64 %s
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV64,RV64C %s
+# RUN: llc -o - %s -mtriple=riscv32 -mattr=+d,+zcf -simplify-mir \
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV32,RV32ZCF %s
+# RUN: llc -o - %s -mtriple=riscv32 -mattr=+d,+zca -simplify-mir \
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV32,RV32ZCA %s
+# RUN: llc -o - %s -mtriple=riscv64 -mattr=+d,+zca -simplify-mir \
+# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV64,RV64ZCA %s
--- |
define void @store_common_value(ptr %a, ptr %b, ptr %c) #0 {
@@ -288,7 +294,7 @@
ret { double, double } %3
}
- attributes #0 = { minsize "target-features"="+c,+f,+d" }
+ attributes #0 = { minsize }
...
---
@@ -306,6 +312,7 @@ body: |
; RV32-NEXT: SW $x13, killed renamable $x11, 0 :: (store (s32) into %ir.b)
; RV32-NEXT: SW $x13, killed renamable $x12, 0 :: (store (s32) into %ir.c)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_value
; RV64: liveins: $x10, $x11, $x12
; RV64-NEXT: {{ $}}
@@ -327,14 +334,15 @@ body: |
bb.0.entry:
liveins: $x10, $x11, $x12, $f16_f
- ; RV32-LABEL: name: store_common_value_float
- ; RV32: liveins: $x10, $x11, $x12, $f16_f
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $f15_f = FSGNJ_S $f16_f, $f16_f
- ; RV32-NEXT: FSW $f15_f, killed renamable $x10, 0 :: (store (s32) into %ir.a)
- ; RV32-NEXT: FSW $f15_f, killed renamable $x11, 0 :: (store (s32) into %ir.b)
- ; RV32-NEXT: FSW killed $f15_f, killed renamable $x12, 0 :: (store (s32) into %ir.c)
- ; RV32-NEXT: PseudoRET
+ ; RV32C-LABEL: name: store_common_value_float
+ ; RV32C: liveins: $x10, $x11, $x12, $f16_f
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $f15_f = FSGNJ_S $f16_f, $f16_f
+ ; RV32C-NEXT: FSW $f15_f, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ ; RV32C-NEXT: FSW $f15_f, killed renamable $x11, 0 :: (store (s32) into %ir.b)
+ ; RV32C-NEXT: FSW killed $f15_f, killed renamable $x12, 0 :: (store (s32) into %ir.c)
+ ; RV32C-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_value_float
; RV64: liveins: $x10, $x11, $x12, $f16_f
; RV64-NEXT: {{ $}}
@@ -342,6 +350,23 @@ body: |
; RV64-NEXT: FSW renamable $f16_f, killed renamable $x11, 0 :: (store (s32) into %ir.b)
; RV64-NEXT: FSW killed renamable $f16_f, killed renamable $x12, 0 :: (store (s32) into %ir.c)
; RV64-NEXT: PseudoRET
+ ;
+ ; RV32ZCF-LABEL: name: store_common_value_float
+ ; RV32ZCF: liveins: $x10, $x11, $x12, $f16_f
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: $f15_f = FSGNJ_S $f16_f, $f16_f
+ ; RV32ZCF-NEXT: FSW $f15_f, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ ; RV32ZCF-NEXT: FSW $f15_f, killed renamable $x11, 0 :: (store (s32) into %ir.b)
+ ; RV32ZCF-NEXT: FSW killed $f15_f, killed renamable $x12, 0 :: (store (s32) into %ir.c)
+ ; RV32ZCF-NEXT: PseudoRET
+ ;
+ ; RV32ZCA-LABEL: name: store_common_value_float
+ ; RV32ZCA: liveins: $x10, $x11, $x12, $f16_f
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: FSW renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a)
+ ; RV32ZCA-NEXT: FSW renamable $f16_f, killed renamable $x11, 0 :: (store (s32) into %ir.b)
+ ; RV32ZCA-NEXT: FSW killed renamable $f16_f, killed renamable $x12, 0 :: (store (s32) into %ir.c)
+ ; RV32ZCA-NEXT: PseudoRET
FSW renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a)
FSW renamable $f16_f, killed renamable $x11, 0 :: (store (s32) into %ir.b)
FSW killed renamable $f16_f, killed renamable $x12, 0 :: (store (s32) into %ir.c)
@@ -355,22 +380,47 @@ body: |
bb.0.entry:
liveins: $x10, $x11, $x12, $f16_d
- ; RV32-LABEL: name: store_common_value_double
- ; RV32: liveins: $x10, $x11, $x12, $f16_d
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $f15_d = FSGNJ_D $f16_d, $f16_d
- ; RV32-NEXT: FSD $f15_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
- ; RV32-NEXT: FSD $f15_d, killed renamable $x11, 0 :: (store (s64) into %ir.b)
- ; RV32-NEXT: FSD killed $f15_d, killed renamable $x12, 0 :: (store (s64) into %ir.c)
- ; RV32-NEXT: PseudoRET
- ; RV64-LABEL: name: store_common_value_double
- ; RV64: liveins: $x10, $x11, $x12, $f16_d
- ; RV64-NEXT: {{ $}}
- ; RV64-NEXT: $f15_d = FSGNJ_D $f16_d, $f16_d
- ; RV64-NEXT: FSD $f15_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
- ; RV64-NEXT: FSD $f15_d, killed renamable $x11, 0 :: (store (s64) into %ir.b)
- ; RV64-NEXT: FSD killed $f15_d, killed renamable $x12, 0 :: (store (s64) into %ir.c)
- ; RV64-NEXT: PseudoRET
+ ; RV32C-LABEL: name: store_common_value_double
+ ; RV32C: liveins: $x10, $x11, $x12, $f16_d
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $f15_d = FSGNJ_D $f16_d, $f16_d
+ ; RV32C-NEXT: FSD $f15_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
+ ; RV32C-NEXT: FSD $f15_d, killed renamable $x11, 0 :: (store (s64) into %ir.b)
+ ; RV32C-NEXT: FSD killed $f15_d, killed renamable $x12, 0 :: (store (s64) into %ir.c)
+ ; RV32C-NEXT: PseudoRET
+ ;
+ ; RV64C-LABEL: name: store_common_value_double
+ ; RV64C: liveins: $x10, $x11, $x12, $f16_d
+ ; RV64C-NEXT: {{ $}}
+ ; RV64C-NEXT: $f15_d = FSGNJ_D $f16_d, $f16_d
+ ; RV64C-NEXT: FSD $f15_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
+ ; RV64C-NEXT: FSD $f15_d, killed renamable $x11, 0 :: (store (s64) into %ir.b)
+ ; RV64C-NEXT: FSD killed $f15_d, killed renamable $x12, 0 :: (store (s64) into %ir.c)
+ ; RV64C-NEXT: PseudoRET
+ ;
+ ; RV32ZCF-LABEL: name: store_common_value_double
+ ; RV32ZCF: liveins: $x10, $x11, $x12, $f16_d
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
+ ; RV32ZCF-NEXT: FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b)
+ ; RV32ZCF-NEXT: FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c)
+ ; RV32ZCF-NEXT: PseudoRET
+ ;
+ ; RV32ZCA-LABEL: name: store_common_value_double
+ ; RV32ZCA: liveins: $x10, $x11, $x12, $f16_d
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
+ ; RV32ZCA-NEXT: FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b)
+ ; RV32ZCA-NEXT: FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c)
+ ; RV32ZCA-NEXT: PseudoRET
+ ;
+ ; RV64ZCA-LABEL: name: store_common_value_double
+ ; RV64ZCA: liveins: $x10, $x11, $x12, $f16_d
+ ; RV64ZCA-NEXT: {{ $}}
+ ; RV64ZCA-NEXT: FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
+ ; RV64ZCA-NEXT: FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b)
+ ; RV64ZCA-NEXT: FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c)
+ ; RV64ZCA-NEXT: PseudoRET
FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b)
FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c)
@@ -395,6 +445,7 @@ body: |
; RV32-NEXT: renamable $x10 = ADDI $x0, 5
; RV32-NEXT: SW killed renamable $x10, killed $x11, 0 :: (volatile store (s32) into %ir.p)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_ptr
; RV64: liveins: $x16
; RV64-NEXT: {{ $}}
@@ -432,6 +483,7 @@ body: |
; RV32-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p)
; RV32-NEXT: SW killed $x11, $x11, 0 :: (volatile store (s32) into %ir.q)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_ptr_self
; RV64: liveins: $x16
; RV64-NEXT: {{ $}}
@@ -457,14 +509,15 @@ body: |
bb.0.entry:
liveins: $x16, $f10_f, $f11_f, $f12_f
- ; RV32-LABEL: name: store_common_ptr_float
- ; RV32: liveins: $x16, $f10_f, $f11_f, $f12_f
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $x10 = ADDI $x16, 0
- ; RV32-NEXT: FSW killed renamable $f10_f, $x10, 0 :: (volatile store (s32) into %ir.p)
- ; RV32-NEXT: FSW killed renamable $f11_f, $x10, 0 :: (volatile store (s32) into %ir.p)
- ; RV32-NEXT: FSW killed renamable $f12_f, killed $x10, 0 :: (volatile store (s32) into %ir.p)
- ; RV32-NEXT: PseudoRET
+ ; RV32C-LABEL: name: store_common_ptr_float
+ ; RV32C: liveins: $x16, $f10_f, $f11_f, $f12_f
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $x10 = ADDI $x16, 0
+ ; RV32C-NEXT: FSW killed renamable $f10_f, $x10, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32C-NEXT: FSW killed renamable $f11_f, $x10, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32C-NEXT: FSW killed renamable $f12_f, killed $x10, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32C-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_ptr_float
; RV64: liveins: $x16, $f10_f, $f11_f, $f12_f
; RV64-NEXT: {{ $}}
@@ -472,6 +525,23 @@ body: |
; RV64-NEXT: FSW killed renamable $f11_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
; RV64-NEXT: FSW killed renamable $f12_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
; RV64-NEXT: PseudoRET
+ ;
+ ; RV32ZCF-LABEL: name: store_common_ptr_float
+ ; RV32ZCF: liveins: $x16, $f10_f, $f11_f, $f12_f
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: $x10 = ADDI $x16, 0
+ ; RV32ZCF-NEXT: FSW killed renamable $f10_f, $x10, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32ZCF-NEXT: FSW killed renamable $f11_f, $x10, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32ZCF-NEXT: FSW killed renamable $f12_f, killed $x10, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32ZCF-NEXT: PseudoRET
+ ;
+ ; RV32ZCA-LABEL: name: store_common_ptr_float
+ ; RV32ZCA: liveins: $x16, $f10_f, $f11_f, $f12_f
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: FSW killed renamable $f10_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32ZCA-NEXT: FSW killed renamable $f11_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32ZCA-NEXT: FSW killed renamable $f12_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
+ ; RV32ZCA-NEXT: PseudoRET
FSW killed renamable $f10_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
FSW killed renamable $f11_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p)
FSW killed renamable $f12_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
@@ -485,22 +555,47 @@ body: |
bb.0.entry:
liveins: $x16, $f10_d, $f11_d, $f12_d
- ; RV32-LABEL: name: store_common_ptr_double
- ; RV32: liveins: $x16, $f10_d, $f11_d, $f12_d
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $x10 = ADDI $x16, 0
- ; RV32-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p)
- ; RV32-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p)
- ; RV32-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p)
- ; RV32-NEXT: PseudoRET
- ; RV64-LABEL: name: store_common_ptr_double
- ; RV64: liveins: $x16, $f10_d, $f11_d, $f12_d
- ; RV64-NEXT: {{ $}}
- ; RV64-NEXT: $x10 = ADDI $x16, 0
- ; RV64-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p)
- ; RV64-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p)
- ; RV64-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p)
- ; RV64-NEXT: PseudoRET
+ ; RV32C-LABEL: name: store_common_ptr_double
+ ; RV32C: liveins: $x16, $f10_d, $f11_d, $f12_d
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $x10 = ADDI $x16, 0
+ ; RV32C-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32C-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32C-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32C-NEXT: PseudoRET
+ ;
+ ; RV64C-LABEL: name: store_common_ptr_double
+ ; RV64C: liveins: $x16, $f10_d, $f11_d, $f12_d
+ ; RV64C-NEXT: {{ $}}
+ ; RV64C-NEXT: $x10 = ADDI $x16, 0
+ ; RV64C-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p)
+ ; RV64C-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p)
+ ; RV64C-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p)
+ ; RV64C-NEXT: PseudoRET
+ ;
+ ; RV32ZCF-LABEL: name: store_common_ptr_double
+ ; RV32ZCF: liveins: $x16, $f10_d, $f11_d, $f12_d
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32ZCF-NEXT: FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32ZCF-NEXT: FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32ZCF-NEXT: PseudoRET
+ ;
+ ; RV32ZCA-LABEL: name: store_common_ptr_double
+ ; RV32ZCA: liveins: $x16, $f10_d, $f11_d, $f12_d
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32ZCA-NEXT: FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32ZCA-NEXT: FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV32ZCA-NEXT: PseudoRET
+ ;
+ ; RV64ZCA-LABEL: name: store_common_ptr_double
+ ; RV64ZCA: liveins: $x16, $f10_d, $f11_d, $f12_d
+ ; RV64ZCA-NEXT: {{ $}}
+ ; RV64ZCA-NEXT: FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV64ZCA-NEXT: FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV64ZCA-NEXT: FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p)
+ ; RV64ZCA-NEXT: PseudoRET
FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p)
@@ -522,6 +617,7 @@ body: |
; RV32-NEXT: dead renamable $x10 = LW $x11, 0 :: (volatile load (s32) from %ir.p)
; RV32-NEXT: dead renamable $x10 = LW killed $x11, 0 :: (volatile load (s32) from %ir.p)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: load_common_ptr
; RV64: liveins: $x16
; RV64-NEXT: {{ $}}
@@ -543,14 +639,15 @@ body: |
bb.0.entry:
liveins: $x16
- ; RV32-LABEL: name: load_common_ptr_float
- ; RV32: liveins: $x16
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $x10 = ADDI $x16, 0
- ; RV32-NEXT: renamable $f10_f = FLW $x10, 0 :: (load (s32) from %ir.g)
- ; RV32-NEXT: renamable $f11_f = FLW $x10, 4 :: (load (s32) from %ir.arrayidx1)
- ; RV32-NEXT: renamable $f12_f = FLW killed $x10, 8 :: (load (s32) from %ir.arrayidx2)
- ; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
+ ; RV32C-LABEL: name: load_common_ptr_float
+ ; RV32C: liveins: $x16
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $x10 = ADDI $x16, 0
+ ; RV32C-NEXT: renamable $f10_f = FLW $x10, 0 :: (load (s32) from %ir.g)
+ ; RV32C-NEXT: renamable $f11_f = FLW $x10, 4 :: (load (s32) from %ir.arrayidx1)
+ ; RV32C-NEXT: renamable $f12_f = FLW killed $x10, 8 :: (load (s32) from %ir.arrayidx2)
+ ; RV32C-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
+ ;
; RV64-LABEL: name: load_common_ptr_float
; RV64: liveins: $x16
; RV64-NEXT: {{ $}}
@@ -558,6 +655,23 @@ body: |
; RV64-NEXT: renamable $f11_f = FLW renamable $x16, 4 :: (load (s32) from %ir.arrayidx1)
; RV64-NEXT: renamable $f12_f = FLW killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2)
; RV64-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
+ ;
+ ; RV32ZCF-LABEL: name: load_common_ptr_float
+ ; RV32ZCF: liveins: $x16
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: $x10 = ADDI $x16, 0
+ ; RV32ZCF-NEXT: renamable $f10_f = FLW $x10, 0 :: (load (s32) from %ir.g)
+ ; RV32ZCF-NEXT: renamable $f11_f = FLW $x10, 4 :: (load (s32) from %ir.arrayidx1)
+ ; RV32ZCF-NEXT: renamable $f12_f = FLW killed $x10, 8 :: (load (s32) from %ir.arrayidx2)
+ ; RV32ZCF-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
+ ;
+ ; RV32ZCA-LABEL: name: load_common_ptr_float
+ ; RV32ZCA: liveins: $x16
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: renamable $f10_f = FLW renamable $x16, 0 :: (load (s32) from %ir.g)
+ ; RV32ZCA-NEXT: renamable $f11_f = FLW renamable $x16, 4 :: (load (s32) from %ir.arrayidx1)
+ ; RV32ZCA-NEXT: renamable $f12_f = FLW killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2)
+ ; RV32ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
renamable $f10_f = FLW renamable $x16, 0 :: (load (s32) from %ir.g)
renamable $f11_f = FLW renamable $x16, 4 :: (load (s32) from %ir.arrayidx1)
renamable $f12_f = FLW killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2)
@@ -571,22 +685,47 @@ body: |
bb.0.entry:
liveins: $x16
- ; RV32-LABEL: name: load_common_ptr_double
- ; RV32: liveins: $x16
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $x10 = ADDI $x16, 0
- ; RV32-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g)
- ; RV32-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1)
- ; RV32-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2)
- ; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
- ; RV64-LABEL: name: load_common_ptr_double
- ; RV64: liveins: $x16
- ; RV64-NEXT: {{ $}}
- ; RV64-NEXT: $x10 = ADDI $x16, 0
- ; RV64-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g)
- ; RV64-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1)
- ; RV64-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2)
- ; RV64-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ; RV32C-LABEL: name: load_common_ptr_double
+ ; RV32C: liveins: $x16
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $x10 = ADDI $x16, 0
+ ; RV32C-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g)
+ ; RV32C-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1)
+ ; RV32C-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2)
+ ; RV32C-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ;
+ ; RV64C-LABEL: name: load_common_ptr_double
+ ; RV64C: liveins: $x16
+ ; RV64C-NEXT: {{ $}}
+ ; RV64C-NEXT: $x10 = ADDI $x16, 0
+ ; RV64C-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g)
+ ; RV64C-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1)
+ ; RV64C-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2)
+ ; RV64C-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ;
+ ; RV32ZCF-LABEL: name: load_common_ptr_double
+ ; RV32ZCF: liveins: $x16
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g)
+ ; RV32ZCF-NEXT: renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1)
+ ; RV32ZCF-NEXT: renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2)
+ ; RV32ZCF-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ;
+ ; RV32ZCA-LABEL: name: load_common_ptr_double
+ ; RV32ZCA: liveins: $x16
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g)
+ ; RV32ZCA-NEXT: renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1)
+ ; RV32ZCA-NEXT: renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2)
+ ; RV32ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ;
+ ; RV64ZCA-LABEL: name: load_common_ptr_double
+ ; RV64ZCA: liveins: $x16
+ ; RV64ZCA-NEXT: {{ $}}
+ ; RV64ZCA-NEXT: renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g)
+ ; RV64ZCA-NEXT: renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1)
+ ; RV64ZCA-NEXT: renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2)
+ ; RV64ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g)
renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1)
renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2)
@@ -613,6 +752,7 @@ body: |
; RV32-NEXT: renamable $x11 = ADDI $x0, 7
; RV32-NEXT: SW killed renamable $x11, killed $x12, 28 :: (volatile store (s32) into %ir.3)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_large_offset
; RV64: liveins: $x10
; RV64-NEXT: {{ $}}
@@ -644,15 +784,16 @@ body: |
bb.0.entry:
liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f
- ; RV32-LABEL: name: store_large_offset_float
- ; RV32: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $x11 = ADDI $x10, 384
- ; RV32-NEXT: FSW killed renamable $f10_f, $x11, 16 :: (volatile store (s32) into %ir.0)
- ; RV32-NEXT: FSW killed renamable $f11_f, $x11, 20 :: (volatile store (s32) into %ir.1)
- ; RV32-NEXT: FSW killed renamable $f12_f, $x11, 24 :: (volatile store (s32) into %ir.2)
- ; RV32-NEXT: FSW killed renamable $f13_f, killed $x11, 28 :: (volatile store (s32) into %ir.3)
- ; RV32-NEXT: PseudoRET
+ ; RV32C-LABEL: name: store_large_offset_float
+ ; RV32C: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $x11 = ADDI $x10, 384
+ ; RV32C-NEXT: FSW killed renamable $f10_f, $x11, 16 :: (volatile store (s32) into %ir.0)
+ ; RV32C-NEXT: FSW killed renamable $f11_f, $x11, 20 :: (volatile store (s32) into %ir.1)
+ ; RV32C-NEXT: FSW killed renamable $f12_f, $x11, 24 :: (volatile store (s32) into %ir.2)
+ ; RV32C-NEXT: FSW killed renamable $f13_f, killed $x11, 28 :: (volatile store (s32) into %ir.3)
+ ; RV32C-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_large_offset_float
; RV64: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f
; RV64-NEXT: {{ $}}
@@ -661,6 +802,25 @@ body: |
; RV64-NEXT: FSW killed renamable $f12_f, renamable $x10, 408 :: (volatile store (s32) into %ir.2)
; RV64-NEXT: FSW killed renamable $f13_f, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3)
; RV64-NEXT: PseudoRET
+ ;
+ ; RV32ZCF-LABEL: name: store_large_offset_float
+ ; RV32ZCF: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: $x11 = ADDI $x10, 384
+ ; RV32ZCF-NEXT: FSW killed renamable $f10_f, $x11, 16 :: (volatile store (s32) into %ir.0)
+ ; RV32ZCF-NEXT: FSW killed renamable $f11_f, $x11, 20 :: (volatile store (s32) into %ir.1)
+ ; RV32ZCF-NEXT: FSW killed renamable $f12_f, $x11, 24 :: (volatile store (s32) into %ir.2)
+ ; RV32ZCF-NEXT: FSW killed renamable $f13_f, killed $x11, 28 :: (volatile store (s32) into %ir.3)
+ ; RV32ZCF-NEXT: PseudoRET
+ ;
+ ; RV32ZCA-LABEL: name: store_large_offset_float
+ ; RV32ZCA: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
+ ; RV32ZCA-NEXT: FSW killed renamable $f11_f, renamable $x10, 404 :: (volatile store (s32) into %ir.1)
+ ; RV32ZCA-NEXT: FSW killed renamable $f12_f, renamable $x10, 408 :: (volatile store (s32) into %ir.2)
+ ; RV32ZCA-NEXT: FSW killed renamable $f13_f, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3)
+ ; RV32ZCA-NEXT: PseudoRET
FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
FSW killed renamable $f11_f, renamable $x10, 404 :: (volatile store (s32) into %ir.1)
FSW killed renamable $f12_f, renamable $x10, 408 :: (volatile store (s32) into %ir.2)
@@ -675,24 +835,52 @@ body: |
bb.0.entry:
liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d
- ; RV32-LABEL: name: store_large_offset_double
- ; RV32: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $x11 = ADDI $x10, 768
- ; RV32-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0)
- ; RV32-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1)
- ; RV32-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2)
- ; RV32-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3)
- ; RV32-NEXT: PseudoRET
- ; RV64-LABEL: name: store_large_offset_double
- ; RV64: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d
- ; RV64-NEXT: {{ $}}
- ; RV64-NEXT: $x11 = ADDI $x10, 768
- ; RV64-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0)
- ; RV64-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1)
- ; RV64-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2)
- ; RV64-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3)
- ; RV64-NEXT: PseudoRET
+ ; RV32C-LABEL: name: store_large_offset_double
+ ; RV32C: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $x11 = ADDI $x10, 768
+ ; RV32C-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0)
+ ; RV32C-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1)
+ ; RV32C-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2)
+ ; RV32C-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3)
+ ; RV32C-NEXT: PseudoRET
+ ;
+ ; RV64C-LABEL: name: store_large_offset_double
+ ; RV64C: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d
+ ; RV64C-NEXT: {{ $}}
+ ; RV64C-NEXT: $x11 = ADDI $x10, 768
+ ; RV64C-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0)
+ ; RV64C-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1)
+ ; RV64C-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2)
+ ; RV64C-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3)
+ ; RV64C-NEXT: PseudoRET
+ ;
+ ; RV32ZCF-LABEL: name: store_large_offset_double
+ ; RV32ZCF: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0)
+ ; RV32ZCF-NEXT: FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1)
+ ; RV32ZCF-NEXT: FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2)
+ ; RV32ZCF-NEXT: FSD killed renamable $f13_d, killed renamable $x10, 824 :: (volatile store (s64) into %ir.3)
+ ; RV32ZCF-NEXT: PseudoRET
+ ;
+ ; RV32ZCA-LABEL: name: store_large_offset_double
+ ; RV32ZCA: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0)
+ ; RV32ZCA-NEXT: FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1)
+ ; RV32ZCA-NEXT: FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2)
+ ; RV32ZCA-NEXT: FSD killed renamable $f13_d, killed renamable $x10, 824 :: (volatile store (s64) into %ir.3)
+ ; RV32ZCA-NEXT: PseudoRET
+ ;
+ ; RV64ZCA-LABEL: name: store_large_offset_double
+ ; RV64ZCA: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d
+ ; RV64ZCA-NEXT: {{ $}}
+ ; RV64ZCA-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0)
+ ; RV64ZCA-NEXT: FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1)
+ ; RV64ZCA-NEXT: FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2)
+ ; RV64ZCA-NEXT: FSD killed renamable $f13_d, killed renamable $x10, 824 :: (volatile store (s64) into %ir.3)
+ ; RV64ZCA-NEXT: PseudoRET
FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0)
FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1)
FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2)
@@ -716,6 +904,7 @@ body: |
; RV32-NEXT: dead renamable $x11 = LW $x12, 24 :: (volatile load (s32) from %ir.2)
; RV32-NEXT: dead renamable $x10 = LW killed $x12, 28 :: (volatile load (s32) from %ir.3)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: load_large_offset
; RV64: liveins: $x10
; RV64-NEXT: {{ $}}
@@ -739,14 +928,15 @@ body: |
bb.0.entry:
liveins: $x10
- ; RV32-LABEL: name: load_large_offset_float
- ; RV32: liveins: $x10
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $x11 = ADDI $x10, 384
- ; RV32-NEXT: renamable $f10_f = FLW $x11, 16 :: (load (s32) from %ir.arrayidx)
- ; RV32-NEXT: renamable $f11_f = FLW $x11, 20 :: (load (s32) from %ir.arrayidx1)
- ; RV32-NEXT: renamable $f12_f = FLW killed $x11, 24 :: (load (s32) from %ir.arrayidx2)
- ; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
+ ; RV32C-LABEL: name: load_large_offset_float
+ ; RV32C: liveins: $x10
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $x11 = ADDI $x10, 384
+ ; RV32C-NEXT: renamable $f10_f = FLW $x11, 16 :: (load (s32) from %ir.arrayidx)
+ ; RV32C-NEXT: renamable $f11_f = FLW $x11, 20 :: (load (s32) from %ir.arrayidx1)
+ ; RV32C-NEXT: renamable $f12_f = FLW killed $x11, 24 :: (load (s32) from %ir.arrayidx2)
+ ; RV32C-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
+ ;
; RV64-LABEL: name: load_large_offset_float
; RV64: liveins: $x10
; RV64-NEXT: {{ $}}
@@ -754,6 +944,23 @@ body: |
; RV64-NEXT: renamable $f11_f = FLW renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
; RV64-NEXT: renamable $f12_f = FLW killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2)
; RV64-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
+ ;
+ ; RV32ZCF-LABEL: name: load_large_offset_float
+ ; RV32ZCF: liveins: $x10
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: $x11 = ADDI $x10, 384
+ ; RV32ZCF-NEXT: renamable $f10_f = FLW $x11, 16 :: (load (s32) from %ir.arrayidx)
+ ; RV32ZCF-NEXT: renamable $f11_f = FLW $x11, 20 :: (load (s32) from %ir.arrayidx1)
+ ; RV32ZCF-NEXT: renamable $f12_f = FLW killed $x11, 24 :: (load (s32) from %ir.arrayidx2)
+ ; RV32ZCF-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
+ ;
+ ; RV32ZCA-LABEL: name: load_large_offset_float
+ ; RV32ZCA: liveins: $x10
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
+ ; RV32ZCA-NEXT: renamable $f11_f = FLW renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
+ ; RV32ZCA-NEXT: renamable $f12_f = FLW killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2)
+ ; RV32ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f
renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
renamable $f11_f = FLW renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
renamable $f12_f = FLW killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2)
@@ -767,22 +974,47 @@ body: |
bb.0.entry:
liveins: $x10
- ; RV32-LABEL: name: load_large_offset_double
- ; RV32: liveins: $x10
- ; RV32-NEXT: {{ $}}
- ; RV32-NEXT: $x11 = ADDI $x10, 768
- ; RV32-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx)
- ; RV32-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1)
- ; RV32-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2)
- ; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
- ; RV64-LABEL: name: load_large_offset_double
- ; RV64: liveins: $x10
- ; RV64-NEXT: {{ $}}
- ; RV64-NEXT: $x11 = ADDI $x10, 768
- ; RV64-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx)
- ; RV64-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1)
- ; RV64-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2)
- ; RV64-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ; RV32C-LABEL: name: load_large_offset_double
+ ; RV32C: liveins: $x10
+ ; RV32C-NEXT: {{ $}}
+ ; RV32C-NEXT: $x11 = ADDI $x10, 768
+ ; RV32C-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx)
+ ; RV32C-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1)
+ ; RV32C-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2)
+ ; RV32C-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ;
+ ; RV64C-LABEL: name: load_large_offset_double
+ ; RV64C: liveins: $x10
+ ; RV64C-NEXT: {{ $}}
+ ; RV64C-NEXT: $x11 = ADDI $x10, 768
+ ; RV64C-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx)
+ ; RV64C-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1)
+ ; RV64C-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2)
+ ; RV64C-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ;
+ ; RV32ZCF-LABEL: name: load_large_offset_double
+ ; RV32ZCF: liveins: $x10
+ ; RV32ZCF-NEXT: {{ $}}
+ ; RV32ZCF-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx)
+ ; RV32ZCF-NEXT: renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1)
+ ; RV32ZCF-NEXT: renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2)
+ ; RV32ZCF-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ;
+ ; RV32ZCA-LABEL: name: load_large_offset_double
+ ; RV32ZCA: liveins: $x10
+ ; RV32ZCA-NEXT: {{ $}}
+ ; RV32ZCA-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx)
+ ; RV32ZCA-NEXT: renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1)
+ ; RV32ZCA-NEXT: renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2)
+ ; RV32ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
+ ;
+ ; RV64ZCA-LABEL: name: load_large_offset_double
+ ; RV64ZCA: liveins: $x10
+ ; RV64ZCA-NEXT: {{ $}}
+ ; RV64ZCA-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx)
+ ; RV64ZCA-NEXT: renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1)
+ ; RV64ZCA-NEXT: renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2)
+ ; RV64ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d
renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx)
renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1)
renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2)
@@ -801,6 +1033,7 @@ body: |
; RV32-NEXT: {{ $}}
; RV32-NEXT: SW $x0, killed renamable $x10, 0 :: (store (s32) into %ir.a)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_value_no_opt
; RV64: liveins: $x10
; RV64-NEXT: {{ $}}
@@ -822,6 +1055,7 @@ body: |
; RV32-NEXT: {{ $}}
; RV32-NEXT: FSW killed renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_value_float_no_opt
; RV64: liveins: $x10, $f16_f
; RV64-NEXT: {{ $}}
@@ -843,6 +1077,7 @@ body: |
; RV32-NEXT: {{ $}}
; RV32-NEXT: FSD killed renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_value_double_no_opt
; RV64: liveins: $x10, $f16_d
; RV64-NEXT: {{ $}}
@@ -865,6 +1100,7 @@ body: |
; RV32-NEXT: renamable $x10 = ADDI $x0, 1
; RV32-NEXT: SW killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_ptr_no_opt
; RV64: liveins: $x16
; RV64-NEXT: {{ $}}
@@ -888,6 +1124,7 @@ body: |
; RV32-NEXT: {{ $}}
; RV32-NEXT: FSW killed renamable $f10_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_ptr_float_no_opt
; RV64: liveins: $x16, $f10_f
; RV64-NEXT: {{ $}}
@@ -909,6 +1146,7 @@ body: |
; RV32-NEXT: {{ $}}
; RV32-NEXT: FSD killed renamable $f10_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_common_ptr_double_no_opt
; RV64: liveins: $x16, $f10_d
; RV64-NEXT: {{ $}}
@@ -930,6 +1168,7 @@ body: |
; RV32-NEXT: {{ $}}
; RV32-NEXT: dead renamable $x10 = LW killed renamable $x16, 0 :: (volatile load (s32) from %ir.p)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: load_common_ptr_no_opt
; RV64: liveins: $x16
; RV64-NEXT: {{ $}}
@@ -951,6 +1190,7 @@ body: |
; RV32-NEXT: {{ $}}
; RV32-NEXT: renamable $f10_f = FLW killed renamable $x16, 0 :: (load (s32) from %ir.g)
; RV32-NEXT: PseudoRET implicit $f10_f
+ ;
; RV64-LABEL: name: load_common_ptr_float_no_opt
; RV64: liveins: $x16
; RV64-NEXT: {{ $}}
@@ -972,6 +1212,7 @@ body: |
; RV32-NEXT: {{ $}}
; RV32-NEXT: renamable $f10_d = FLD killed renamable $x16, 0 :: (load (s64) from %ir.g)
; RV32-NEXT: PseudoRET implicit $f10_d
+ ;
; RV64-LABEL: name: load_common_ptr_double_no_opt
; RV64: liveins: $x16
; RV64-NEXT: {{ $}}
@@ -996,6 +1237,7 @@ body: |
; RV32-NEXT: renamable $x11 = ADDI $x0, 3
; RV32-NEXT: SW killed renamable $x11, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_large_offset_no_opt
; RV64: liveins: $x10
; RV64-NEXT: {{ $}}
@@ -1024,6 +1266,7 @@ body: |
; RV32-NEXT: FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0)
; RV32-NEXT: FSW killed renamable $f11_f, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_large_offset_float_no_opt
; RV64: liveins: $x10, $f10_f, $f11_f
; RV64-NEXT: {{ $}}
@@ -1048,6 +1291,7 @@ body: |
; RV32-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0)
; RV32-NEXT: FSD killed renamable $f11_d, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: store_large_offset_double_no_opt
; RV64: liveins: $x10, $f10_d, $f11_d
; RV64-NEXT: {{ $}}
@@ -1072,6 +1316,7 @@ body: |
; RV32-NEXT: dead renamable $x11 = LW renamable $x10, 400 :: (volatile load (s32) from %ir.0)
; RV32-NEXT: dead renamable $x10 = LW killed renamable $x10, 404 :: (volatile load (s32) from %ir.1)
; RV32-NEXT: PseudoRET
+ ;
; RV64-LABEL: name: load_large_offset_no_opt
; RV64: liveins: $x10
; RV64-NEXT: {{ $}}
@@ -1096,6 +1341,7 @@ body: |
; RV32-NEXT: renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx)
; RV32-NEXT: renamable $f11_f = FLW killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1)
; RV32-NEXT: PseudoRET implicit $f10_f, implicit $f11_f
+ ;
; RV64-LABEL: name: load_large_offset_float_no_opt
; RV64: liveins: $x10
; RV64-NEXT: {{ $}}
@@ -1120,6 +1366,7 @@ body: |
; RV32-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx)
; RV32-NEXT: renamable $f11_d = FLD killed renamable $x10, 808 :: (load (s64) from %ir.arrayidx1)
; RV32-NEXT: PseudoRET implicit $f10_d, implicit $f11_d
+ ;
; RV64-LABEL: name: load_large_offset_double_no_opt
; RV64: liveins: $x10
; RV64-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/RISCV/pr51206.ll b/llvm/test/CodeGen/RISCV/pr51206.ll
index f54031a..8aa145f 100644
--- a/llvm/test/CodeGen/RISCV/pr51206.ll
+++ b/llvm/test/CodeGen/RISCV/pr51206.ll
@@ -27,16 +27,12 @@ define signext i32 @wobble() nounwind {
; CHECK-NEXT: lui a2, %hi(global.3)
; CHECK-NEXT: li a3, 5
; CHECK-NEXT: sw a1, %lo(global.3)(a2)
-; CHECK-NEXT: bltu a0, a3, .LBB0_2
-; CHECK-NEXT: # %bb.1: # %bb10
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT: call quux
-; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .LBB0_2: # %bb12
+; CHECK-NEXT: bgeu a0, a3, .LBB0_2
+; CHECK-NEXT: # %bb.1: # %bb12
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_2: # %bb10
+; CHECK-NEXT: tail quux
bb:
%tmp = load i8, ptr @global, align 1
%tmp1 = zext i8 %tmp to i32
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 1724b48..a09ab3e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
-; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
-; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F
declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
@@ -15086,31 +15086,27 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
ret <32 x i64> %x
}
-; FIXME: This is a miscompile triggered by the mgather ->
-; riscv.masked.strided.load combine. In order for it to trigger we need either a
-; strided gather that RISCVGatherScatterLowering doesn't pick up, or a new
-; strided gather generated by the widening sew combine.
define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
; RV32V-LABEL: masked_gather_widen_sew_negative_stride:
; RV32V: # %bb.0:
-; RV32V-NEXT: addi a0, a0, -128
-; RV32V-NEXT: li a1, -128
+; RV32V-NEXT: addi a0, a0, 136
+; RV32V-NEXT: li a1, -136
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32V-NEXT: vlse64.v v8, (a0), a1
; RV32V-NEXT: ret
;
; RV64V-LABEL: masked_gather_widen_sew_negative_stride:
; RV64V: # %bb.0:
-; RV64V-NEXT: addi a0, a0, -128
-; RV64V-NEXT: li a1, -128
+; RV64V-NEXT: addi a0, a0, 136
+; RV64V-NEXT: li a1, -136
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vlse64.v v8, (a0), a1
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: masked_gather_widen_sew_negative_stride:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lui a1, 16392
-; RV32ZVE32F-NEXT: addi a1, a1, 1152
+; RV32ZVE32F-NEXT: lui a1, 16393
+; RV32ZVE32F-NEXT: addi a1, a1, -888
; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.s.x v9, a1
; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9
@@ -15118,8 +15114,8 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
;
; RV64ZVE32F-LABEL: masked_gather_widen_sew_negative_stride:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: addi a1, a0, 128
-; RV64ZVE32F-NEXT: lw a2, 132(a0)
+; RV64ZVE32F-NEXT: addi a1, a0, 136
+; RV64ZVE32F-NEXT: lw a2, 140(a0)
; RV64ZVE32F-NEXT: lw a3, 0(a0)
; RV64ZVE32F-NEXT: lw a0, 4(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
@@ -15128,7 +15124,7 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32F-NEXT: ret
- %ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 32, i64 33, i64 0, i64 1>
+ %ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 34, i64 35, i64 0, i64 1>
%x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> shufflevector(<4 x i1> insertelement(<4 x i1> poison, i1 true, i32 0), <4 x i1> poison, <4 x i32> zeroinitializer), <4 x i32> poison)
ret <4 x i32> %x
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
new file mode 100644
index 0000000..6c5dd04
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
@@ -0,0 +1,1701 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <8 x i7> @llvm.vp.sadd.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
+
+define <8 x i7> @vsadd_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v9
+; CHECK-NEXT: vsra.vi v9, v9, 1
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i7> @llvm.vp.sadd.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i7> %v
+}
+
+declare <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vsadd_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsadd_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vsadd_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i8_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsadd_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+declare <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32)
+
+define <5 x i8> @vsadd_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsadd_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vsadd_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsadd_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vsadd_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+declare <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v258i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: addi a0, a1, -128
+; CHECK-NEXT: sltu a3, a1, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a0, a3, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a1, a2, .LBB32_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+define <256 x i8> @vsadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v258i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: addi a1, a0, -128
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v16, v16, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %head = insertelement <256 x i1> poison, i1 true, i32 0
+ %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vsadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vsadd_vi_v258i8_evl129:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v24, (a0)
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+ ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vsadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vsadd_vi_v258i8_evl128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v24, (a0)
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+ ret <256 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vsadd_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsadd_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vsadd_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsadd_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vsadd_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsadd_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vsadd_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsadd_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vsadd_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsadd_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vsadd_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsadd_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vsadd_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsadd_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vsadd_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsadd_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vsadd_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v9, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsadd_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vsadd_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v10, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v10
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsadd_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vsadd_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v12, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsadd_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vsadd_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v16i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v16i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_v32i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB108_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB108_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v32i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB108_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB108_2:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vi_v32i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB109_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB109_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v24
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v16, v16, v24
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vi_v32i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB109_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB109_2:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vi v8, v8, -1
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vi v16, v16, -1
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %head = insertelement <32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+; FIXME: We don't match vsadd.vi on RV32.
+
+define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vsadd_vx_v32i64_evl12:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v32i64_evl12:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t
+; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vsadd_vx_v32i64_evl27:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_v32i64_evl27:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t
+; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+ ret <32 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll
new file mode 100644
index 0000000..6227f8a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll
@@ -0,0 +1,1697 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <8 x i7> @llvm.vp.uadd.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
+
+define <8 x i7> @vsaddu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 127
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i7> @llvm.vp.uadd.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i7> %v
+}
+
+declare <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vsaddu_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsaddu_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vsaddu_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i8_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsaddu_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+declare <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32)
+
+define <5 x i8> @vsaddu_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vsaddu_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vsaddu_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsaddu_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vsaddu_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsaddu_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+declare <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v258i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: addi a0, a1, -128
+; CHECK-NEXT: sltu a3, a1, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a0, a3, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a1, a2, .LBB32_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+define <256 x i8> @vsaddu_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v258i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: addi a1, a0, -128
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v16, v16, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %head = insertelement <256 x i1> poison, i1 true, i32 0
+ %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vsaddu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vsaddu_vi_v258i8_evl129:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v24, (a0)
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+ ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vsaddu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vsaddu_vi_v258i8_evl128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v24, (a0)
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+ ret <256 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vsaddu_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsaddu_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vsaddu_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsaddu_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vsaddu_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsaddu_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vsaddu_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsaddu_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vsaddu_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsaddu_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vsaddu_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsaddu_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vsaddu_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsaddu_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vsaddu_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsaddu_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vsaddu_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsaddu_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vsaddu_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v10
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsaddu_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vsaddu_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v12, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsaddu_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vsaddu_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v16i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v16i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsaddu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_v32i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB108_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB108_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v32i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB108_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB108_2:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vi_v32i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB109_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB109_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v24
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v16, v16, v24
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vi_v32i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB109_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB109_2:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vi v8, v8, -1
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vi v16, v16, -1
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %head = insertelement <32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+; FIXME: We don't match vsaddu.vi on RV32.
+
+define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vsaddu_vx_v32i64_evl12:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v32i64_evl12:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vsaddu_vx_v32i64_evl27:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_v32i64_evl27:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+ ret <32 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
new file mode 100644
index 0000000..6360cf4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
@@ -0,0 +1,1745 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <8 x i7> @llvm.vp.ssub.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
+
+define <8 x i7> @vssub_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vadd.vv v9, v9, v9
+; CHECK-NEXT: vsra.vi v9, v9, 1
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i7> @llvm.vp.ssub.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i7> %v
+}
+
+declare <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vssub_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssub_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vssub_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i8_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssub_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+declare <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32)
+
+define <5 x i8> @vssub_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssub_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vssub_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssub_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vssub_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssub_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+declare <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v258i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: addi a0, a1, -128
+; CHECK-NEXT: sltu a3, a1, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t
+; CHECK-NEXT: bltu a1, a2, .LBB32_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+define <256 x i8> @vssub_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v258i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a2
+; CHECK-NEXT: addi a1, a0, -128
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v16, v16, a2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %head = insertelement <256 x i1> poison, i1 true, i32 0
+ %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vssub_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vssub_vi_v258i8_evl129:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v24, (a0)
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+ ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vssub_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vssub_vi_v258i8_evl128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v24, (a0)
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+ ret <256 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vssub_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssub_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vssub_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssub_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vssub_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssub_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vssub_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssub_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vssub_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssub_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vssub_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssub_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vssub_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssub_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vssub_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssub_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vssub_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v9, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssub_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vssub_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v10, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v10
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssub_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vssub_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v12, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssub_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vssub_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v16i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v16i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssub_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_v32i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB108_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB108_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v24, v0.t
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vssub.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v32i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB108_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB108_2:
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a2, v0.t
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vssub.vx v16, v16, a2, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vi_v32i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB109_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB109_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v24
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v16, v16, v24
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vi_v32i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB109_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB109_2:
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a2
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v16, v16, a2
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %head = insertelement <32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+; FIXME: We don't match vssub.vi on RV32.
+
+define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vssub_vx_v32i64_evl12:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vssub.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v32i64_evl12:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: li a0, -1
+; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vssub.vx v16, v16, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vssub_vx_v32i64_evl27:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vssub.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_v32i64_evl27:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: li a0, -1
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vssub.vx v16, v16, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+ ret <32 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll
new file mode 100644
index 0000000..6ea9758
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll
@@ -0,0 +1,1740 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <8 x i7> @llvm.vp.usub.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
+
+define <8 x i7> @vssubu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 127
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i7> @llvm.vp.usub.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i7> %v
+}
+
+declare <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vssubu_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vssubu_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vssubu_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i8_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vssubu_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+declare <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32)
+
+define <5 x i8> @vssubu_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+define <5 x i8> @vssubu_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v5i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer
+ %head = insertelement <5 x i1> poison, i1 true, i32 0
+ %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer
+ %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl)
+ ret <5 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vssubu_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vssubu_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vssubu_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vssubu_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+declare <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32)
+
+define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v258i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: addi a0, a1, -128
+; CHECK-NEXT: sltu a3, a1, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t
+; CHECK-NEXT: bltu a1, a2, .LBB32_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+define <256 x i8> @vssubu_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v258i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: bltu a0, a2, .LBB33_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a2
+; CHECK-NEXT: addi a1, a0, -128
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v16, v16, a2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %head = insertelement <256 x i1> poison, i1 true, i32 0
+ %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl)
+ ret <256 x i8> %v
+}
+
+; Test splitting when the %evl is a known constant.
+
+define <256 x i8> @vssubu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vssubu_vi_v258i8_evl129:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v24, (a0)
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129)
+ ret <256 x i8> %v
+}
+
+; FIXME: The upper half is doing nothing.
+
+define <256 x i8> @vssubu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) {
+; CHECK-LABEL: vssubu_vi_v258i8_evl128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 128
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v24, (a0)
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer
+ %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128)
+ ret <256 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vssubu_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vssubu_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vssubu_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vssubu_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vssubu_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vssubu_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vssubu_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vssubu_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vssubu_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vssubu_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vssubu_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vssubu_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vssubu_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vssubu_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vssubu_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vssubu_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vssubu_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v9, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vssubu_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vssubu_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v10, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v10
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vssubu_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vssubu_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v12, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vssubu_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vssubu_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v16i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v16i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vssubu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+; Test that split-legalization works as expected.
+
+declare <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32)
+
+define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_v32i64:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB108_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB108_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v32i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB108_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB108_2:
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a2, v0.t
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vssubu.vx v16, v16, a2, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vi_v32i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: bltu a0, a2, .LBB109_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a1, 16
+; RV32-NEXT: .LBB109_2:
+; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v24
+; RV32-NEXT: addi a1, a0, -16
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v16, v16, v24
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vi_v32i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bltu a0, a2, .LBB109_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a1, 16
+; RV64-NEXT: .LBB109_2:
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a2
+; RV64-NEXT: addi a1, a0, -16
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v16, v16, a2
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %head = insertelement <32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl)
+ ret <32 x i64> %v
+}
+
+; FIXME: We don't match vssubu.vi on RV32.
+
+define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vssubu_vx_v32i64_evl12:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v32i64_evl12:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: li a0, -1
+; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) {
+; RV32-LABEL: vssubu_vx_v32i64_evl27:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v1, v0, 2
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmv.v.i v24, -1
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t
+; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; RV32-NEXT: vmv1r.v v0, v1
+; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_v32i64_evl27:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v24, v0, 2
+; RV64-NEXT: li a0, -1
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer
+ %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27)
+ ret <32 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
index 574c265..a084b53 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
@@ -385,12 +385,12 @@ define <32 x i64> @vwsubu_v32i64(ptr %x, ptr %y) nounwind {
define <2 x i32> @vwsubu_v2i32_v2i8(ptr %x, ptr %y) {
; CHECK-LABEL: vwsubu_v2i32_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v11, v9
-; CHECK-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-NEXT: vwsubu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
%a = load <2 x i8>, ptr %x
%b = load <2 x i8>, ptr %y
@@ -899,12 +899,12 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind {
define <2 x i32> @vwsubu_v2i32_of_v2i8(ptr %x, ptr %y) {
; CHECK-LABEL: vwsubu_v2i32_of_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v11, v9
-; CHECK-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-NEXT: vwsubu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
%a = load <2 x i8>, ptr %x
%b = load <2 x i8>, ptr %y
@@ -917,12 +917,12 @@ define <2 x i32> @vwsubu_v2i32_of_v2i8(ptr %x, ptr %y) {
define <2 x i64> @vwsubu_v2i64_of_v2i8(ptr %x, ptr %y) {
; CHECK-LABEL: vwsubu_v2i64_of_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: vzext.vf4 v10, v8
-; CHECK-NEXT: vzext.vf4 v11, v9
-; CHECK-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-NEXT: vwsubu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsext.vf4 v8, v10
; CHECK-NEXT: ret
%a = load <2 x i8>, ptr %x
%b = load <2 x i8>, ptr %y
@@ -935,12 +935,12 @@ define <2 x i64> @vwsubu_v2i64_of_v2i8(ptr %x, ptr %y) {
define <2 x i64> @vwsubu_v2i64_of_v2i16(ptr %x, ptr %y) {
; CHECK-LABEL: vwsubu_v2i64_of_v2i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vle16.v v9, (a1)
-; CHECK-NEXT: vzext.vf2 v10, v8
-; CHECK-NEXT: vzext.vf2 v11, v9
-; CHECK-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-NEXT: vwsubu.vv v10, v8, v9
+; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsext.vf2 v8, v10
; CHECK-NEXT: ret
%a = load <2 x i16>, ptr %x
%b = load <2 x i16>, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
new file mode 100644
index 0000000..caaeae5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
@@ -0,0 +1,2015 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 8 x i7> @llvm.vp.sadd.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i7> @vsadd_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i7> @llvm.vp.sadd.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i7> %v
+}
+
+declare <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vsadd_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i8_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsadd_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vsadd_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsadd_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i8> @vsadd_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsadd_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vsadd_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsadd_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vsadd_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsadd_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vsadd_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsadd_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vsadd_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsadd_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vsadd_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsadd_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vsadd_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv128i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: sub a2, a1, a0
+; CHECK-NEXT: sltu a3, a1, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a1, a0, .LBB50_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: .LBB50_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vsadd_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv128i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v16, v16, -1
+; CHECK-NEXT: bltu a0, a1, .LBB51_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB51_2:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+ %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vsadd_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsadd_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vsadd_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsadd_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vsadd_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsadd_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vsadd_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsadd_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vsadd_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsadd_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vsadd_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsadd_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vsadd_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsadd_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vsadd_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsadd_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vsadd_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsadd_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vsadd_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsadd_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vsadd_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vx_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsadd_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vsadd_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB118_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB118_2:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+define <vscale x 32 x i32> @vsadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv32i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v16, v16, -1
+; CHECK-NEXT: bltu a0, a1, .LBB119_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB119_2:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vsadd_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v9, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv1i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_nxv1i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsadd_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vsadd_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v10, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v10
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_nxv2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsadd_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vsadd_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v12, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_nxv4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsadd_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vsadd_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vv_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsadd_vx_nxv8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vsadd.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsadd_vx_nxv8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsadd_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsadd_vi_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll
new file mode 100644
index 0000000..c0779e5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll
@@ -0,0 +1,2014 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 8 x i7> @llvm.vp.uadd.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i7> @vsaddu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 127
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vand.vx v9, v9, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vminu.vx v8, v8, a2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i7> @llvm.vp.uadd.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i7> %v
+}
+
+declare <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vsaddu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i8_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsaddu_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vsaddu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsaddu_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i8> @vsaddu_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vsaddu_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vsaddu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsaddu_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vsaddu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsaddu_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vsaddu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsaddu_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vsaddu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsaddu_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vsaddu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsaddu_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vsaddu_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv128i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: sub a2, a1, a0
+; CHECK-NEXT: sltu a3, a1, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a1, a0, .LBB50_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: .LBB50_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vsaddu_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv128i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v16, v16, -1
+; CHECK-NEXT: bltu a0, a1, .LBB51_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB51_2:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+ %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vsaddu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsaddu_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vsaddu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsaddu_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vsaddu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsaddu_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vsaddu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsaddu_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vsaddu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsaddu_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vsaddu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsaddu_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vsaddu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsaddu_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vsaddu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsaddu_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vsaddu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsaddu_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vsaddu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsaddu_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vsaddu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vx_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsaddu_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vsaddu_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t
+; CHECK-NEXT: bltu a0, a1, .LBB118_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB118_2:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+define <vscale x 32 x i32> @vsaddu_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv32i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a3, a2
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v16, v16, -1
+; CHECK-NEXT: bltu a0, a1, .LBB119_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB119_2:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vsaddu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v9, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv1i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_nxv1i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsaddu_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vsaddu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v10, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v10
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_nxv2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsaddu_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vsaddu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v12, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_nxv4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsaddu_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vsaddu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vv_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsaddu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsaddu_vx_nxv8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vsaddu.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsaddu_vx_nxv8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vsaddu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsaddu_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsaddu_vi_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vsaddu.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
new file mode 100644
index 0000000..2d51a2e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
@@ -0,0 +1,2067 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 8 x i7> @llvm.vp.ssub.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i7> @vssub_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vsra.vi v8, v8, 1
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 63
+; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 192
+; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i7> @llvm.vp.ssub.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i7> %v
+}
+
+declare <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vssub_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i8_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssub_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vssub_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssub_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i8> @vssub_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssub_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vssub_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssub_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vssub_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssub_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vssub_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssub_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vssub_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssub_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vssub_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssub_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vssub_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv128i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub a0, a1, a2
+; CHECK-NEXT: sltu a3, a1, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t
+; CHECK-NEXT: bltu a1, a2, .LBB50_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: .LBB50_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vssub_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv128i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a2
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v16, v16, a2
+; CHECK-NEXT: bltu a0, a1, .LBB51_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB51_2:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+ %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vssub_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssub_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vssub_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssub_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vssub_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssub_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vssub_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssub_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vssub_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssub_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vssub_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssub_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vssub_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssub_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vssub_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssub_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vssub_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssub_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vssub_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssub_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vssub_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vx_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssub_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vssub_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a1, a2, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: sub a1, a0, a2
+; CHECK-NEXT: sltu a3, a0, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a1
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vx v16, v16, a1, v0.t
+; CHECK-NEXT: bltu a0, a2, .LBB118_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: .LBB118_2:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+define <vscale x 32 x i32> @vssub_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv32i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a2
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vx v16, v16, a2
+; CHECK-NEXT: bltu a0, a1, .LBB119_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB119_2:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vssub_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v9, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv1i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_nxv1i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssub_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vssub_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v10, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v10
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_nxv2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssub_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vssub_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v12, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_nxv4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssub_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vssub_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vv_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssub_vx_nxv8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vssub.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssub_vx_nxv8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssub_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssub_vi_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssub.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
new file mode 100644
index 0000000..e5589ce
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
@@ -0,0 +1,2065 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 8 x i7> @llvm.vp.usub.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i7> @vssubu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 127
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vand.vx v9, v9, a2
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i7> @llvm.vp.usub.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i7> %v
+}
+
+declare <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vssubu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i8_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v9, v8, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vssubu_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vssubu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vssubu_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define <vscale x 3 x i8> @vssubu_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vssubu_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv3i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vssubu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vssubu_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vssubu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vssubu_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vssubu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vssubu_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vssubu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vssubu_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vssubu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vssubu_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+; Test that split-legalization works when the mask itself needs splitting.
+
+declare <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32)
+
+define <vscale x 128 x i8> @vssubu_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv128i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub a0, a1, a2
+; CHECK-NEXT: sltu a3, a1, a0
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t
+; CHECK-NEXT: bltu a1, a2, .LBB50_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: .LBB50_2:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vssubu_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv128i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a2
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v16, v16, a2
+; CHECK-NEXT: bltu a0, a1, .LBB51_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB51_2:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0
+ %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+ %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer
+ %v = call <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl)
+ ret <vscale x 128 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vssubu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vssubu_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vssubu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vssubu_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vssubu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vssubu_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vssubu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vssubu_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vssubu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vssubu_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vssubu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vssubu_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vssubu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vssubu_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vssubu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vssubu_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vssubu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vssubu_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vssubu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vssubu_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vssubu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vx_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vssubu_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+; Test that split-legalization works then the mask needs manual splitting.
+
+declare <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i32> @vssubu_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a1, a2, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: sub a1, a0, a2
+; CHECK-NEXT: sltu a3, a0, a1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a1
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v16, v16, a1, v0.t
+; CHECK-NEXT: bltu a0, a2, .LBB118_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: .LBB118_2:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+define <vscale x 32 x i32> @vssubu_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv32i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: sltu a3, a0, a2
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a3, a3, a2
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v16, v16, a2
+; CHECK-NEXT: bltu a0, a1, .LBB119_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB119_2:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vssubu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v9, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv1i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v9, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_nxv1i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vssubu_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vssubu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v10, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v10
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_nxv2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vssubu_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vssubu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v12, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v12, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v12
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_nxv4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vssubu_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vssubu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vv_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssubu.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vssubu_vx_nxv8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT: vssubu.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vssubu_vx_nxv8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vssubu.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vssubu_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vssubu_vi_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vssubu.vx v8, v8, a1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index e01984b..e07e520 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -1606,23 +1606,255 @@ define i32 @select_cst_unknown(i32 signext %a, i32 signext %b) {
; RV64IMXVTCONDOPS-LABEL: select_cst_unknown:
; RV64IMXVTCONDOPS: # %bb.0:
; RV64IMXVTCONDOPS-NEXT: slt a0, a0, a1
-; RV64IMXVTCONDOPS-NEXT: li a1, -7
-; RV64IMXVTCONDOPS-NEXT: vt.maskcn a1, a1, a0
-; RV64IMXVTCONDOPS-NEXT: li a2, 5
-; RV64IMXVTCONDOPS-NEXT: vt.maskc a0, a2, a0
-; RV64IMXVTCONDOPS-NEXT: or a0, a0, a1
+; RV64IMXVTCONDOPS-NEXT: li a1, -12
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 5
; RV64IMXVTCONDOPS-NEXT: ret
;
; CHECKZICOND-LABEL: select_cst_unknown:
; CHECKZICOND: # %bb.0:
; CHECKZICOND-NEXT: slt a0, a0, a1
-; CHECKZICOND-NEXT: li a1, -7
-; CHECKZICOND-NEXT: czero.nez a1, a1, a0
-; CHECKZICOND-NEXT: li a2, 5
-; CHECKZICOND-NEXT: czero.eqz a0, a2, a0
-; CHECKZICOND-NEXT: or a0, a0, a1
+; CHECKZICOND-NEXT: li a1, -12
+; CHECKZICOND-NEXT: czero.nez a0, a1, a0
+; CHECKZICOND-NEXT: addi a0, a0, 5
; CHECKZICOND-NEXT: ret
%cond = icmp slt i32 %a, %b
%ret = select i1 %cond, i32 5, i32 -7
ret i32 %ret
}
+
+define i32 @select_cst1(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst1:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: mv a1, a0
+; RV32IM-NEXT: li a0, 10
+; RV32IM-NEXT: bnez a1, .LBB43_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: li a0, 20
+; RV32IM-NEXT: .LBB43_2:
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst1:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: mv a1, a0
+; RV64IM-NEXT: li a0, 10
+; RV64IM-NEXT: bnez a1, .LBB43_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: li a0, 20
+; RV64IM-NEXT: .LBB43_2:
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst1:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: li a1, 10
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 10
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; CHECKZICOND-LABEL: select_cst1:
+; CHECKZICOND: # %bb.0:
+; CHECKZICOND-NEXT: li a1, 10
+; CHECKZICOND-NEXT: czero.nez a0, a1, a0
+; CHECKZICOND-NEXT: addi a0, a0, 10
+; CHECKZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 10, i32 20
+ ret i32 %ret
+}
+
+define i32 @select_cst2(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst2:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: mv a1, a0
+; RV32IM-NEXT: li a0, 10
+; RV32IM-NEXT: bnez a1, .LBB44_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: lui a0, 5
+; RV32IM-NEXT: addi a0, a0, -480
+; RV32IM-NEXT: .LBB44_2:
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst2:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: mv a1, a0
+; RV64IM-NEXT: li a0, 10
+; RV64IM-NEXT: bnez a1, .LBB44_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: lui a0, 5
+; RV64IM-NEXT: addiw a0, a0, -480
+; RV64IM-NEXT: .LBB44_2:
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst2:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: lui a1, 5
+; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -490
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 10
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; RV32IMZICOND-LABEL: select_cst2:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: lui a1, 5
+; RV32IMZICOND-NEXT: addi a1, a1, -490
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: addi a0, a0, 10
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst2:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: lui a1, 5
+; RV64IMZICOND-NEXT: addiw a1, a1, -490
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: addi a0, a0, 10
+; RV64IMZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 10, i32 20000
+ ret i32 %ret
+}
+
+define i32 @select_cst3(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst3:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: bnez a0, .LBB45_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: lui a0, 5
+; RV32IM-NEXT: addi a0, a0, -480
+; RV32IM-NEXT: ret
+; RV32IM-NEXT: .LBB45_2:
+; RV32IM-NEXT: lui a0, 7
+; RV32IM-NEXT: addi a0, a0, 1328
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst3:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: bnez a0, .LBB45_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: lui a0, 5
+; RV64IM-NEXT: addiw a0, a0, -480
+; RV64IM-NEXT: ret
+; RV64IM-NEXT: .LBB45_2:
+; RV64IM-NEXT: lui a0, 7
+; RV64IM-NEXT: addiw a0, a0, 1328
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst3:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: lui a1, 1048574
+; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -1808
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: lui a1, 7
+; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, 1328
+; RV64IMXVTCONDOPS-NEXT: add a0, a0, a1
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; RV32IMZICOND-LABEL: select_cst3:
+; RV32IMZICOND: # %bb.0:
+; RV32IMZICOND-NEXT: lui a1, 1048574
+; RV32IMZICOND-NEXT: addi a1, a1, -1808
+; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: lui a1, 7
+; RV32IMZICOND-NEXT: addi a1, a1, 1328
+; RV32IMZICOND-NEXT: add a0, a0, a1
+; RV32IMZICOND-NEXT: ret
+;
+; RV64IMZICOND-LABEL: select_cst3:
+; RV64IMZICOND: # %bb.0:
+; RV64IMZICOND-NEXT: lui a1, 1048574
+; RV64IMZICOND-NEXT: addiw a1, a1, -1808
+; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: lui a1, 7
+; RV64IMZICOND-NEXT: addiw a1, a1, 1328
+; RV64IMZICOND-NEXT: add a0, a0, a1
+; RV64IMZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 30000, i32 20000
+ ret i32 %ret
+}
+
+define i32 @select_cst4(i1 zeroext %cond) {
+; CHECK-LABEL: select_cst4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: xori a0, a0, 2047
+; CHECK-NEXT: ret
+ %ret = select i1 %cond, i32 -2048, i32 2047
+ ret i32 %ret
+}
+
+define i32 @select_cst5(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst5:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: mv a1, a0
+; RV32IM-NEXT: li a0, 2047
+; RV32IM-NEXT: bnez a1, .LBB47_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: lui a0, 1
+; RV32IM-NEXT: addi a0, a0, -2047
+; RV32IM-NEXT: .LBB47_2:
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst5:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: mv a1, a0
+; RV64IM-NEXT: li a0, 2047
+; RV64IM-NEXT: bnez a1, .LBB47_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: lui a0, 1
+; RV64IM-NEXT: addiw a0, a0, -2047
+; RV64IM-NEXT: .LBB47_2:
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst5:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: li a1, 2
+; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; CHECKZICOND-LABEL: select_cst5:
+; CHECKZICOND: # %bb.0:
+; CHECKZICOND-NEXT: li a1, 2
+; CHECKZICOND-NEXT: czero.nez a0, a1, a0
+; CHECKZICOND-NEXT: addi a0, a0, 2047
+; CHECKZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 2047, i32 2049
+ ret i32 %ret
+}
+
+define i32 @select_cst6(i1 zeroext %cond) {
+; RV32IM-LABEL: select_cst6:
+; RV32IM: # %bb.0:
+; RV32IM-NEXT: bnez a0, .LBB48_2
+; RV32IM-NEXT: # %bb.1:
+; RV32IM-NEXT: li a0, 2047
+; RV32IM-NEXT: ret
+; RV32IM-NEXT: .LBB48_2:
+; RV32IM-NEXT: lui a0, 1
+; RV32IM-NEXT: addi a0, a0, -2047
+; RV32IM-NEXT: ret
+;
+; RV64IM-LABEL: select_cst6:
+; RV64IM: # %bb.0:
+; RV64IM-NEXT: bnez a0, .LBB48_2
+; RV64IM-NEXT: # %bb.1:
+; RV64IM-NEXT: li a0, 2047
+; RV64IM-NEXT: ret
+; RV64IM-NEXT: .LBB48_2:
+; RV64IM-NEXT: lui a0, 1
+; RV64IM-NEXT: addiw a0, a0, -2047
+; RV64IM-NEXT: ret
+;
+; RV64IMXVTCONDOPS-LABEL: select_cst6:
+; RV64IMXVTCONDOPS: # %bb.0:
+; RV64IMXVTCONDOPS-NEXT: li a1, 2
+; RV64IMXVTCONDOPS-NEXT: vt.maskc a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047
+; RV64IMXVTCONDOPS-NEXT: ret
+;
+; CHECKZICOND-LABEL: select_cst6:
+; CHECKZICOND: # %bb.0:
+; CHECKZICOND-NEXT: li a1, 2
+; CHECKZICOND-NEXT: czero.eqz a0, a1, a0
+; CHECKZICOND-NEXT: addi a0, a0, 2047
+; CHECKZICOND-NEXT: ret
+ %ret = select i1 %cond, i32 2049, i32 2047
+ ret i32 %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index a2a953c..87f2a63 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -871,3 +871,64 @@ define void @zext_nneg_dominating_icmp_i32_zeroext(i16 signext %0) {
5:
ret void
}
+
+; The load is used extended and non-extended in the successor basic block. The
+; signed compare will cause the non-extended value to exported out of the first
+; basic block using a sext to XLen. We need to CSE the zext nneg with the sext
+; so that we can form a sextload.
+define void @load_zext_nneg_sext_cse(ptr %p) nounwind {
+; RV32I-LABEL: load_zext_nneg_sext_cse:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: lh s0, 0(a0)
+; RV32I-NEXT: bltz s0, .LBB50_2
+; RV32I-NEXT: # %bb.1: # %bb1
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: call bar_i16
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: tail bar_i32
+; RV32I-NEXT: .LBB50_2: # %bb2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64-LABEL: load_zext_nneg_sext_cse:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT: lh s0, 0(a0)
+; RV64-NEXT: bltz s0, .LBB50_2
+; RV64-NEXT: # %bb.1: # %bb1
+; RV64-NEXT: mv a0, s0
+; RV64-NEXT: call bar_i16
+; RV64-NEXT: mv a0, s0
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: tail bar_i32
+; RV64-NEXT: .LBB50_2: # %bb2
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+ %load = load i16, ptr %p
+ %zext = zext nneg i16 %load to i32
+ %cmp = icmp sgt i16 %load, -1
+ br i1 %cmp, label %bb1, label %bb2
+
+bb1:
+ tail call void @bar_i16(i16 signext %load)
+ tail call void @bar_i32(i32 signext %zext)
+ br label %bb2
+
+bb2:
+ ret void
+}
+declare void @bar_i16(i16);
diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll
index fc35bc4..8d065da 100644
--- a/llvm/test/CodeGen/RISCV/split-offsets.ll
+++ b/llvm/test/CodeGen/RISCV/split-offsets.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV32I
-; RUN: llc -mtriple=riscv64 -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s -check-prefix=RV64I
; Check that memory accesses to array elements with large offsets have those
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index ec6e978..7fc4713 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
-; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32IM %s
-; RUN: llc -mtriple=riscv64 -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
-; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64IM %s
define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index eea8e64..540883fdc 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
-; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s
-; RUN: llc -mtriple=riscv64 -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s
-; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs -riscv-enable-sink-fold < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s