diff options
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
18 files changed, 15767 insertions, 191 deletions
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll index 91e7399..3c2e846 100644 --- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ -; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV32I %s +; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -code-model=medium < %s \ -; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV32I-MEDIUM %s +; RUN: | FileCheck -check-prefix=RV32I-MEDIUM %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ -; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV64I %s +; RUN: | FileCheck -check-prefix=RV64I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -code-model=medium < %s \ -; RUN: -riscv-enable-sink-fold | FileCheck -check-prefix=RV64I-MEDIUM %s +; RUN: | FileCheck -check-prefix=RV64I-MEDIUM %s ; We can often fold an ADDI into the offset of load/store instructions: ; (load (addi base, off1), off2) -> (load base, off1+off2) diff --git a/llvm/test/CodeGen/RISCV/make-compressible.mir b/llvm/test/CodeGen/RISCV/make-compressible.mir index 2105a13..03da38a 100644 --- a/llvm/test/CodeGen/RISCV/make-compressible.mir +++ b/llvm/test/CodeGen/RISCV/make-compressible.mir @@ -1,8 +1,14 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -o - %s -mtriple=riscv32 -mattr=+c,+f,+d -simplify-mir \ -# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefix=RV32 %s +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV32,RV32C %s # RUN: llc -o - %s -mtriple=riscv64 -mattr=+c,+f,+d -simplify-mir \ -# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefix=RV64 %s +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV64,RV64C %s +# RUN: llc -o - %s -mtriple=riscv32 -mattr=+d,+zcf -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV32,RV32ZCF %s +# RUN: llc -o - %s -mtriple=riscv32 -mattr=+d,+zca -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV32,RV32ZCA %s +# RUN: llc -o - %s -mtriple=riscv64 -mattr=+d,+zca -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV64,RV64ZCA %s --- | define void @store_common_value(ptr %a, ptr %b, ptr %c) #0 { @@ -288,7 +294,7 @@ ret { double, double } %3 } - attributes #0 = { minsize "target-features"="+c,+f,+d" } + attributes #0 = { minsize } ... --- @@ -306,6 +312,7 @@ body: | ; RV32-NEXT: SW $x13, killed renamable $x11, 0 :: (store (s32) into %ir.b) ; RV32-NEXT: SW $x13, killed renamable $x12, 0 :: (store (s32) into %ir.c) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_value ; RV64: liveins: $x10, $x11, $x12 ; RV64-NEXT: {{ $}} @@ -327,14 +334,15 @@ body: | bb.0.entry: liveins: $x10, $x11, $x12, $f16_f - ; RV32-LABEL: name: store_common_value_float - ; RV32: liveins: $x10, $x11, $x12, $f16_f - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $f15_f = FSGNJ_S $f16_f, $f16_f - ; RV32-NEXT: FSW $f15_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) - ; RV32-NEXT: FSW $f15_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) - ; RV32-NEXT: FSW killed $f15_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) - ; RV32-NEXT: PseudoRET + ; RV32C-LABEL: name: store_common_value_float + ; RV32C: liveins: $x10, $x11, $x12, $f16_f + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $f15_f = FSGNJ_S $f16_f, $f16_f + ; RV32C-NEXT: FSW $f15_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV32C-NEXT: FSW $f15_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) + ; RV32C-NEXT: FSW killed $f15_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) + ; RV32C-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_value_float ; RV64: liveins: $x10, $x11, $x12, $f16_f ; RV64-NEXT: {{ $}} @@ -342,6 +350,23 @@ body: | ; RV64-NEXT: FSW renamable $f16_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) ; RV64-NEXT: FSW killed renamable $f16_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) ; RV64-NEXT: PseudoRET + ; + ; RV32ZCF-LABEL: name: store_common_value_float + ; RV32ZCF: liveins: $x10, $x11, $x12, $f16_f + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: $f15_f = FSGNJ_S $f16_f, $f16_f + ; RV32ZCF-NEXT: FSW $f15_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV32ZCF-NEXT: FSW $f15_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) + ; RV32ZCF-NEXT: FSW killed $f15_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) + ; RV32ZCF-NEXT: PseudoRET + ; + ; RV32ZCA-LABEL: name: store_common_value_float + ; RV32ZCA: liveins: $x10, $x11, $x12, $f16_f + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: FSW renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; RV32ZCA-NEXT: FSW renamable $f16_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) + ; RV32ZCA-NEXT: FSW killed renamable $f16_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) + ; RV32ZCA-NEXT: PseudoRET FSW renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) FSW renamable $f16_f, killed renamable $x11, 0 :: (store (s32) into %ir.b) FSW killed renamable $f16_f, killed renamable $x12, 0 :: (store (s32) into %ir.c) @@ -355,22 +380,47 @@ body: | bb.0.entry: liveins: $x10, $x11, $x12, $f16_d - ; RV32-LABEL: name: store_common_value_double - ; RV32: liveins: $x10, $x11, $x12, $f16_d - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $f15_d = FSGNJ_D $f16_d, $f16_d - ; RV32-NEXT: FSD $f15_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) - ; RV32-NEXT: FSD $f15_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) - ; RV32-NEXT: FSD killed $f15_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) - ; RV32-NEXT: PseudoRET - ; RV64-LABEL: name: store_common_value_double - ; RV64: liveins: $x10, $x11, $x12, $f16_d - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: $f15_d = FSGNJ_D $f16_d, $f16_d - ; RV64-NEXT: FSD $f15_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) - ; RV64-NEXT: FSD $f15_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) - ; RV64-NEXT: FSD killed $f15_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) - ; RV64-NEXT: PseudoRET + ; RV32C-LABEL: name: store_common_value_double + ; RV32C: liveins: $x10, $x11, $x12, $f16_d + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $f15_d = FSGNJ_D $f16_d, $f16_d + ; RV32C-NEXT: FSD $f15_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV32C-NEXT: FSD $f15_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) + ; RV32C-NEXT: FSD killed $f15_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) + ; RV32C-NEXT: PseudoRET + ; + ; RV64C-LABEL: name: store_common_value_double + ; RV64C: liveins: $x10, $x11, $x12, $f16_d + ; RV64C-NEXT: {{ $}} + ; RV64C-NEXT: $f15_d = FSGNJ_D $f16_d, $f16_d + ; RV64C-NEXT: FSD $f15_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV64C-NEXT: FSD $f15_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) + ; RV64C-NEXT: FSD killed $f15_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) + ; RV64C-NEXT: PseudoRET + ; + ; RV32ZCF-LABEL: name: store_common_value_double + ; RV32ZCF: liveins: $x10, $x11, $x12, $f16_d + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV32ZCF-NEXT: FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) + ; RV32ZCF-NEXT: FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) + ; RV32ZCF-NEXT: PseudoRET + ; + ; RV32ZCA-LABEL: name: store_common_value_double + ; RV32ZCA: liveins: $x10, $x11, $x12, $f16_d + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV32ZCA-NEXT: FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) + ; RV32ZCA-NEXT: FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) + ; RV32ZCA-NEXT: PseudoRET + ; + ; RV64ZCA-LABEL: name: store_common_value_double + ; RV64ZCA: liveins: $x10, $x11, $x12, $f16_d + ; RV64ZCA-NEXT: {{ $}} + ; RV64ZCA-NEXT: FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) + ; RV64ZCA-NEXT: FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) + ; RV64ZCA-NEXT: FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) + ; RV64ZCA-NEXT: PseudoRET FSD renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) FSD renamable $f16_d, killed renamable $x11, 0 :: (store (s64) into %ir.b) FSD killed renamable $f16_d, killed renamable $x12, 0 :: (store (s64) into %ir.c) @@ -395,6 +445,7 @@ body: | ; RV32-NEXT: renamable $x10 = ADDI $x0, 5 ; RV32-NEXT: SW killed renamable $x10, killed $x11, 0 :: (volatile store (s32) into %ir.p) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_ptr ; RV64: liveins: $x16 ; RV64-NEXT: {{ $}} @@ -432,6 +483,7 @@ body: | ; RV32-NEXT: SW killed renamable $x10, $x11, 0 :: (volatile store (s32) into %ir.p) ; RV32-NEXT: SW killed $x11, $x11, 0 :: (volatile store (s32) into %ir.q) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_ptr_self ; RV64: liveins: $x16 ; RV64-NEXT: {{ $}} @@ -457,14 +509,15 @@ body: | bb.0.entry: liveins: $x16, $f10_f, $f11_f, $f12_f - ; RV32-LABEL: name: store_common_ptr_float - ; RV32: liveins: $x16, $f10_f, $f11_f, $f12_f - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $x10 = ADDI $x16, 0 - ; RV32-NEXT: FSW killed renamable $f10_f, $x10, 0 :: (volatile store (s32) into %ir.p) - ; RV32-NEXT: FSW killed renamable $f11_f, $x10, 0 :: (volatile store (s32) into %ir.p) - ; RV32-NEXT: FSW killed renamable $f12_f, killed $x10, 0 :: (volatile store (s32) into %ir.p) - ; RV32-NEXT: PseudoRET + ; RV32C-LABEL: name: store_common_ptr_float + ; RV32C: liveins: $x16, $f10_f, $f11_f, $f12_f + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $x10 = ADDI $x16, 0 + ; RV32C-NEXT: FSW killed renamable $f10_f, $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32C-NEXT: FSW killed renamable $f11_f, $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32C-NEXT: FSW killed renamable $f12_f, killed $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32C-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_ptr_float ; RV64: liveins: $x16, $f10_f, $f11_f, $f12_f ; RV64-NEXT: {{ $}} @@ -472,6 +525,23 @@ body: | ; RV64-NEXT: FSW killed renamable $f11_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) ; RV64-NEXT: FSW killed renamable $f12_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) ; RV64-NEXT: PseudoRET + ; + ; RV32ZCF-LABEL: name: store_common_ptr_float + ; RV32ZCF: liveins: $x16, $f10_f, $f11_f, $f12_f + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: $x10 = ADDI $x16, 0 + ; RV32ZCF-NEXT: FSW killed renamable $f10_f, $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32ZCF-NEXT: FSW killed renamable $f11_f, $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32ZCF-NEXT: FSW killed renamable $f12_f, killed $x10, 0 :: (volatile store (s32) into %ir.p) + ; RV32ZCF-NEXT: PseudoRET + ; + ; RV32ZCA-LABEL: name: store_common_ptr_float + ; RV32ZCA: liveins: $x16, $f10_f, $f11_f, $f12_f + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: FSW killed renamable $f10_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV32ZCA-NEXT: FSW killed renamable $f11_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV32ZCA-NEXT: FSW killed renamable $f12_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; RV32ZCA-NEXT: PseudoRET FSW killed renamable $f10_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) FSW killed renamable $f11_f, renamable $x16, 0 :: (volatile store (s32) into %ir.p) FSW killed renamable $f12_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) @@ -485,22 +555,47 @@ body: | bb.0.entry: liveins: $x16, $f10_d, $f11_d, $f12_d - ; RV32-LABEL: name: store_common_ptr_double - ; RV32: liveins: $x16, $f10_d, $f11_d, $f12_d - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $x10 = ADDI $x16, 0 - ; RV32-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p) - ; RV32-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p) - ; RV32-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p) - ; RV32-NEXT: PseudoRET - ; RV64-LABEL: name: store_common_ptr_double - ; RV64: liveins: $x16, $f10_d, $f11_d, $f12_d - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: $x10 = ADDI $x16, 0 - ; RV64-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p) - ; RV64-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p) - ; RV64-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p) - ; RV64-NEXT: PseudoRET + ; RV32C-LABEL: name: store_common_ptr_double + ; RV32C: liveins: $x16, $f10_d, $f11_d, $f12_d + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $x10 = ADDI $x16, 0 + ; RV32C-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV32C-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV32C-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV32C-NEXT: PseudoRET + ; + ; RV64C-LABEL: name: store_common_ptr_double + ; RV64C: liveins: $x16, $f10_d, $f11_d, $f12_d + ; RV64C-NEXT: {{ $}} + ; RV64C-NEXT: $x10 = ADDI $x16, 0 + ; RV64C-NEXT: FSD killed renamable $f10_d, $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV64C-NEXT: FSD killed renamable $f11_d, $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV64C-NEXT: FSD killed renamable $f12_d, killed $x10, 0 :: (volatile store (s64) into %ir.p) + ; RV64C-NEXT: PseudoRET + ; + ; RV32ZCF-LABEL: name: store_common_ptr_double + ; RV32ZCF: liveins: $x16, $f10_d, $f11_d, $f12_d + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV32ZCF-NEXT: FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV32ZCF-NEXT: FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV32ZCF-NEXT: PseudoRET + ; + ; RV32ZCA-LABEL: name: store_common_ptr_double + ; RV32ZCA: liveins: $x16, $f10_d, $f11_d, $f12_d + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV32ZCA-NEXT: FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV32ZCA-NEXT: FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV32ZCA-NEXT: PseudoRET + ; + ; RV64ZCA-LABEL: name: store_common_ptr_double + ; RV64ZCA: liveins: $x16, $f10_d, $f11_d, $f12_d + ; RV64ZCA-NEXT: {{ $}} + ; RV64ZCA-NEXT: FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV64ZCA-NEXT: FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV64ZCA-NEXT: FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) + ; RV64ZCA-NEXT: PseudoRET FSD killed renamable $f10_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) FSD killed renamable $f11_d, renamable $x16, 0 :: (volatile store (s64) into %ir.p) FSD killed renamable $f12_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) @@ -522,6 +617,7 @@ body: | ; RV32-NEXT: dead renamable $x10 = LW $x11, 0 :: (volatile load (s32) from %ir.p) ; RV32-NEXT: dead renamable $x10 = LW killed $x11, 0 :: (volatile load (s32) from %ir.p) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: load_common_ptr ; RV64: liveins: $x16 ; RV64-NEXT: {{ $}} @@ -543,14 +639,15 @@ body: | bb.0.entry: liveins: $x16 - ; RV32-LABEL: name: load_common_ptr_float - ; RV32: liveins: $x16 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $x10 = ADDI $x16, 0 - ; RV32-NEXT: renamable $f10_f = FLW $x10, 0 :: (load (s32) from %ir.g) - ; RV32-NEXT: renamable $f11_f = FLW $x10, 4 :: (load (s32) from %ir.arrayidx1) - ; RV32-NEXT: renamable $f12_f = FLW killed $x10, 8 :: (load (s32) from %ir.arrayidx2) - ; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; RV32C-LABEL: name: load_common_ptr_float + ; RV32C: liveins: $x16 + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $x10 = ADDI $x16, 0 + ; RV32C-NEXT: renamable $f10_f = FLW $x10, 0 :: (load (s32) from %ir.g) + ; RV32C-NEXT: renamable $f11_f = FLW $x10, 4 :: (load (s32) from %ir.arrayidx1) + ; RV32C-NEXT: renamable $f12_f = FLW killed $x10, 8 :: (load (s32) from %ir.arrayidx2) + ; RV32C-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; ; RV64-LABEL: name: load_common_ptr_float ; RV64: liveins: $x16 ; RV64-NEXT: {{ $}} @@ -558,6 +655,23 @@ body: | ; RV64-NEXT: renamable $f11_f = FLW renamable $x16, 4 :: (load (s32) from %ir.arrayidx1) ; RV64-NEXT: renamable $f12_f = FLW killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2) ; RV64-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; + ; RV32ZCF-LABEL: name: load_common_ptr_float + ; RV32ZCF: liveins: $x16 + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: $x10 = ADDI $x16, 0 + ; RV32ZCF-NEXT: renamable $f10_f = FLW $x10, 0 :: (load (s32) from %ir.g) + ; RV32ZCF-NEXT: renamable $f11_f = FLW $x10, 4 :: (load (s32) from %ir.arrayidx1) + ; RV32ZCF-NEXT: renamable $f12_f = FLW killed $x10, 8 :: (load (s32) from %ir.arrayidx2) + ; RV32ZCF-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; + ; RV32ZCA-LABEL: name: load_common_ptr_float + ; RV32ZCA: liveins: $x16 + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: renamable $f10_f = FLW renamable $x16, 0 :: (load (s32) from %ir.g) + ; RV32ZCA-NEXT: renamable $f11_f = FLW renamable $x16, 4 :: (load (s32) from %ir.arrayidx1) + ; RV32ZCA-NEXT: renamable $f12_f = FLW killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2) + ; RV32ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f renamable $f10_f = FLW renamable $x16, 0 :: (load (s32) from %ir.g) renamable $f11_f = FLW renamable $x16, 4 :: (load (s32) from %ir.arrayidx1) renamable $f12_f = FLW killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2) @@ -571,22 +685,47 @@ body: | bb.0.entry: liveins: $x16 - ; RV32-LABEL: name: load_common_ptr_double - ; RV32: liveins: $x16 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $x10 = ADDI $x16, 0 - ; RV32-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g) - ; RV32-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1) - ; RV32-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2) - ; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d - ; RV64-LABEL: name: load_common_ptr_double - ; RV64: liveins: $x16 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: $x10 = ADDI $x16, 0 - ; RV64-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g) - ; RV64-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1) - ; RV64-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2) - ; RV64-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; RV32C-LABEL: name: load_common_ptr_double + ; RV32C: liveins: $x16 + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $x10 = ADDI $x16, 0 + ; RV32C-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g) + ; RV32C-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1) + ; RV32C-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2) + ; RV32C-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; + ; RV64C-LABEL: name: load_common_ptr_double + ; RV64C: liveins: $x16 + ; RV64C-NEXT: {{ $}} + ; RV64C-NEXT: $x10 = ADDI $x16, 0 + ; RV64C-NEXT: renamable $f10_d = FLD $x10, 0 :: (load (s64) from %ir.g) + ; RV64C-NEXT: renamable $f11_d = FLD $x10, 8 :: (load (s64) from %ir.arrayidx1) + ; RV64C-NEXT: renamable $f12_d = FLD killed $x10, 16 :: (load (s64) from %ir.arrayidx2) + ; RV64C-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; + ; RV32ZCF-LABEL: name: load_common_ptr_double + ; RV32ZCF: liveins: $x16 + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g) + ; RV32ZCF-NEXT: renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1) + ; RV32ZCF-NEXT: renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2) + ; RV32ZCF-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; + ; RV32ZCA-LABEL: name: load_common_ptr_double + ; RV32ZCA: liveins: $x16 + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g) + ; RV32ZCA-NEXT: renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1) + ; RV32ZCA-NEXT: renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2) + ; RV32ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; + ; RV64ZCA-LABEL: name: load_common_ptr_double + ; RV64ZCA: liveins: $x16 + ; RV64ZCA-NEXT: {{ $}} + ; RV64ZCA-NEXT: renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g) + ; RV64ZCA-NEXT: renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1) + ; RV64ZCA-NEXT: renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2) + ; RV64ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d renamable $f10_d = FLD renamable $x16, 0 :: (load (s64) from %ir.g) renamable $f11_d = FLD renamable $x16, 8 :: (load (s64) from %ir.arrayidx1) renamable $f12_d = FLD killed renamable $x16, 16 :: (load (s64) from %ir.arrayidx2) @@ -613,6 +752,7 @@ body: | ; RV32-NEXT: renamable $x11 = ADDI $x0, 7 ; RV32-NEXT: SW killed renamable $x11, killed $x12, 28 :: (volatile store (s32) into %ir.3) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_large_offset ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} @@ -644,15 +784,16 @@ body: | bb.0.entry: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f - ; RV32-LABEL: name: store_large_offset_float - ; RV32: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $x11 = ADDI $x10, 384 - ; RV32-NEXT: FSW killed renamable $f10_f, $x11, 16 :: (volatile store (s32) into %ir.0) - ; RV32-NEXT: FSW killed renamable $f11_f, $x11, 20 :: (volatile store (s32) into %ir.1) - ; RV32-NEXT: FSW killed renamable $f12_f, $x11, 24 :: (volatile store (s32) into %ir.2) - ; RV32-NEXT: FSW killed renamable $f13_f, killed $x11, 28 :: (volatile store (s32) into %ir.3) - ; RV32-NEXT: PseudoRET + ; RV32C-LABEL: name: store_large_offset_float + ; RV32C: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $x11 = ADDI $x10, 384 + ; RV32C-NEXT: FSW killed renamable $f10_f, $x11, 16 :: (volatile store (s32) into %ir.0) + ; RV32C-NEXT: FSW killed renamable $f11_f, $x11, 20 :: (volatile store (s32) into %ir.1) + ; RV32C-NEXT: FSW killed renamable $f12_f, $x11, 24 :: (volatile store (s32) into %ir.2) + ; RV32C-NEXT: FSW killed renamable $f13_f, killed $x11, 28 :: (volatile store (s32) into %ir.3) + ; RV32C-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_large_offset_float ; RV64: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f ; RV64-NEXT: {{ $}} @@ -661,6 +802,25 @@ body: | ; RV64-NEXT: FSW killed renamable $f12_f, renamable $x10, 408 :: (volatile store (s32) into %ir.2) ; RV64-NEXT: FSW killed renamable $f13_f, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3) ; RV64-NEXT: PseudoRET + ; + ; RV32ZCF-LABEL: name: store_large_offset_float + ; RV32ZCF: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: $x11 = ADDI $x10, 384 + ; RV32ZCF-NEXT: FSW killed renamable $f10_f, $x11, 16 :: (volatile store (s32) into %ir.0) + ; RV32ZCF-NEXT: FSW killed renamable $f11_f, $x11, 20 :: (volatile store (s32) into %ir.1) + ; RV32ZCF-NEXT: FSW killed renamable $f12_f, $x11, 24 :: (volatile store (s32) into %ir.2) + ; RV32ZCF-NEXT: FSW killed renamable $f13_f, killed $x11, 28 :: (volatile store (s32) into %ir.3) + ; RV32ZCF-NEXT: PseudoRET + ; + ; RV32ZCA-LABEL: name: store_large_offset_float + ; RV32ZCA: liveins: $x10, $f10_f, $f11_f, $f12_f, $f13_f + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + ; RV32ZCA-NEXT: FSW killed renamable $f11_f, renamable $x10, 404 :: (volatile store (s32) into %ir.1) + ; RV32ZCA-NEXT: FSW killed renamable $f12_f, renamable $x10, 408 :: (volatile store (s32) into %ir.2) + ; RV32ZCA-NEXT: FSW killed renamable $f13_f, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3) + ; RV32ZCA-NEXT: PseudoRET FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0) FSW killed renamable $f11_f, renamable $x10, 404 :: (volatile store (s32) into %ir.1) FSW killed renamable $f12_f, renamable $x10, 408 :: (volatile store (s32) into %ir.2) @@ -675,24 +835,52 @@ body: | bb.0.entry: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d - ; RV32-LABEL: name: store_large_offset_double - ; RV32: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $x11 = ADDI $x10, 768 - ; RV32-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0) - ; RV32-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1) - ; RV32-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2) - ; RV32-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3) - ; RV32-NEXT: PseudoRET - ; RV64-LABEL: name: store_large_offset_double - ; RV64: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: $x11 = ADDI $x10, 768 - ; RV64-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0) - ; RV64-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1) - ; RV64-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2) - ; RV64-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3) - ; RV64-NEXT: PseudoRET + ; RV32C-LABEL: name: store_large_offset_double + ; RV32C: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $x11 = ADDI $x10, 768 + ; RV32C-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0) + ; RV32C-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1) + ; RV32C-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2) + ; RV32C-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3) + ; RV32C-NEXT: PseudoRET + ; + ; RV64C-LABEL: name: store_large_offset_double + ; RV64C: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d + ; RV64C-NEXT: {{ $}} + ; RV64C-NEXT: $x11 = ADDI $x10, 768 + ; RV64C-NEXT: FSD killed renamable $f10_d, $x11, 32 :: (volatile store (s64) into %ir.0) + ; RV64C-NEXT: FSD killed renamable $f11_d, $x11, 40 :: (volatile store (s64) into %ir.1) + ; RV64C-NEXT: FSD killed renamable $f12_d, $x11, 48 :: (volatile store (s64) into %ir.2) + ; RV64C-NEXT: FSD killed renamable $f13_d, killed $x11, 56 :: (volatile store (s64) into %ir.3) + ; RV64C-NEXT: PseudoRET + ; + ; RV32ZCF-LABEL: name: store_large_offset_double + ; RV32ZCF: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + ; RV32ZCF-NEXT: FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1) + ; RV32ZCF-NEXT: FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2) + ; RV32ZCF-NEXT: FSD killed renamable $f13_d, killed renamable $x10, 824 :: (volatile store (s64) into %ir.3) + ; RV32ZCF-NEXT: PseudoRET + ; + ; RV32ZCA-LABEL: name: store_large_offset_double + ; RV32ZCA: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + ; RV32ZCA-NEXT: FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1) + ; RV32ZCA-NEXT: FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2) + ; RV32ZCA-NEXT: FSD killed renamable $f13_d, killed renamable $x10, 824 :: (volatile store (s64) into %ir.3) + ; RV32ZCA-NEXT: PseudoRET + ; + ; RV64ZCA-LABEL: name: store_large_offset_double + ; RV64ZCA: liveins: $x10, $f10_d, $f11_d, $f12_d, $f13_d + ; RV64ZCA-NEXT: {{ $}} + ; RV64ZCA-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) + ; RV64ZCA-NEXT: FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1) + ; RV64ZCA-NEXT: FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2) + ; RV64ZCA-NEXT: FSD killed renamable $f13_d, killed renamable $x10, 824 :: (volatile store (s64) into %ir.3) + ; RV64ZCA-NEXT: PseudoRET FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) FSD killed renamable $f11_d, renamable $x10, 808 :: (volatile store (s64) into %ir.1) FSD killed renamable $f12_d, renamable $x10, 816 :: (volatile store (s64) into %ir.2) @@ -716,6 +904,7 @@ body: | ; RV32-NEXT: dead renamable $x11 = LW $x12, 24 :: (volatile load (s32) from %ir.2) ; RV32-NEXT: dead renamable $x10 = LW killed $x12, 28 :: (volatile load (s32) from %ir.3) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: load_large_offset ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} @@ -739,14 +928,15 @@ body: | bb.0.entry: liveins: $x10 - ; RV32-LABEL: name: load_large_offset_float - ; RV32: liveins: $x10 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $x11 = ADDI $x10, 384 - ; RV32-NEXT: renamable $f10_f = FLW $x11, 16 :: (load (s32) from %ir.arrayidx) - ; RV32-NEXT: renamable $f11_f = FLW $x11, 20 :: (load (s32) from %ir.arrayidx1) - ; RV32-NEXT: renamable $f12_f = FLW killed $x11, 24 :: (load (s32) from %ir.arrayidx2) - ; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; RV32C-LABEL: name: load_large_offset_float + ; RV32C: liveins: $x10 + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $x11 = ADDI $x10, 384 + ; RV32C-NEXT: renamable $f10_f = FLW $x11, 16 :: (load (s32) from %ir.arrayidx) + ; RV32C-NEXT: renamable $f11_f = FLW $x11, 20 :: (load (s32) from %ir.arrayidx1) + ; RV32C-NEXT: renamable $f12_f = FLW killed $x11, 24 :: (load (s32) from %ir.arrayidx2) + ; RV32C-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; ; RV64-LABEL: name: load_large_offset_float ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} @@ -754,6 +944,23 @@ body: | ; RV64-NEXT: renamable $f11_f = FLW renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) ; RV64-NEXT: renamable $f12_f = FLW killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2) ; RV64-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; + ; RV32ZCF-LABEL: name: load_large_offset_float + ; RV32ZCF: liveins: $x10 + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: $x11 = ADDI $x10, 384 + ; RV32ZCF-NEXT: renamable $f10_f = FLW $x11, 16 :: (load (s32) from %ir.arrayidx) + ; RV32ZCF-NEXT: renamable $f11_f = FLW $x11, 20 :: (load (s32) from %ir.arrayidx1) + ; RV32ZCF-NEXT: renamable $f12_f = FLW killed $x11, 24 :: (load (s32) from %ir.arrayidx2) + ; RV32ZCF-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f + ; + ; RV32ZCA-LABEL: name: load_large_offset_float + ; RV32ZCA: liveins: $x10 + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + ; RV32ZCA-NEXT: renamable $f11_f = FLW renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + ; RV32ZCA-NEXT: renamable $f12_f = FLW killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2) + ; RV32ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $f10_f, implicit $f11_f, implicit $f12_f renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx) renamable $f11_f = FLW renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) renamable $f12_f = FLW killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2) @@ -767,22 +974,47 @@ body: | bb.0.entry: liveins: $x10 - ; RV32-LABEL: name: load_large_offset_double - ; RV32: liveins: $x10 - ; RV32-NEXT: {{ $}} - ; RV32-NEXT: $x11 = ADDI $x10, 768 - ; RV32-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx) - ; RV32-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1) - ; RV32-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2) - ; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d - ; RV64-LABEL: name: load_large_offset_double - ; RV64: liveins: $x10 - ; RV64-NEXT: {{ $}} - ; RV64-NEXT: $x11 = ADDI $x10, 768 - ; RV64-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx) - ; RV64-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1) - ; RV64-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2) - ; RV64-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; RV32C-LABEL: name: load_large_offset_double + ; RV32C: liveins: $x10 + ; RV32C-NEXT: {{ $}} + ; RV32C-NEXT: $x11 = ADDI $x10, 768 + ; RV32C-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx) + ; RV32C-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1) + ; RV32C-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2) + ; RV32C-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; + ; RV64C-LABEL: name: load_large_offset_double + ; RV64C: liveins: $x10 + ; RV64C-NEXT: {{ $}} + ; RV64C-NEXT: $x11 = ADDI $x10, 768 + ; RV64C-NEXT: renamable $f10_d = FLD $x11, 32 :: (load (s64) from %ir.arrayidx) + ; RV64C-NEXT: renamable $f11_d = FLD $x11, 40 :: (load (s64) from %ir.arrayidx1) + ; RV64C-NEXT: renamable $f12_d = FLD killed $x11, 48 :: (load (s64) from %ir.arrayidx2) + ; RV64C-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; + ; RV32ZCF-LABEL: name: load_large_offset_double + ; RV32ZCF: liveins: $x10 + ; RV32ZCF-NEXT: {{ $}} + ; RV32ZCF-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) + ; RV32ZCF-NEXT: renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) + ; RV32ZCF-NEXT: renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2) + ; RV32ZCF-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; + ; RV32ZCA-LABEL: name: load_large_offset_double + ; RV32ZCA: liveins: $x10 + ; RV32ZCA-NEXT: {{ $}} + ; RV32ZCA-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) + ; RV32ZCA-NEXT: renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) + ; RV32ZCA-NEXT: renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2) + ; RV32ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d + ; + ; RV64ZCA-LABEL: name: load_large_offset_double + ; RV64ZCA: liveins: $x10 + ; RV64ZCA-NEXT: {{ $}} + ; RV64ZCA-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) + ; RV64ZCA-NEXT: renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) + ; RV64ZCA-NEXT: renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2) + ; RV64ZCA-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, implicit $x2, implicit $f10_d, implicit $f11_d, implicit $f12_d renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) renamable $f11_d = FLD renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) renamable $f12_d = FLD killed renamable $x10, 816 :: (load (s64) from %ir.arrayidx2) @@ -801,6 +1033,7 @@ body: | ; RV32-NEXT: {{ $}} ; RV32-NEXT: SW $x0, killed renamable $x10, 0 :: (store (s32) into %ir.a) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_value_no_opt ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} @@ -822,6 +1055,7 @@ body: | ; RV32-NEXT: {{ $}} ; RV32-NEXT: FSW killed renamable $f16_f, killed renamable $x10, 0 :: (store (s32) into %ir.a) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_value_float_no_opt ; RV64: liveins: $x10, $f16_f ; RV64-NEXT: {{ $}} @@ -843,6 +1077,7 @@ body: | ; RV32-NEXT: {{ $}} ; RV32-NEXT: FSD killed renamable $f16_d, killed renamable $x10, 0 :: (store (s64) into %ir.a) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_value_double_no_opt ; RV64: liveins: $x10, $f16_d ; RV64-NEXT: {{ $}} @@ -865,6 +1100,7 @@ body: | ; RV32-NEXT: renamable $x10 = ADDI $x0, 1 ; RV32-NEXT: SW killed renamable $x10, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_ptr_no_opt ; RV64: liveins: $x16 ; RV64-NEXT: {{ $}} @@ -888,6 +1124,7 @@ body: | ; RV32-NEXT: {{ $}} ; RV32-NEXT: FSW killed renamable $f10_f, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_ptr_float_no_opt ; RV64: liveins: $x16, $f10_f ; RV64-NEXT: {{ $}} @@ -909,6 +1146,7 @@ body: | ; RV32-NEXT: {{ $}} ; RV32-NEXT: FSD killed renamable $f10_d, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_common_ptr_double_no_opt ; RV64: liveins: $x16, $f10_d ; RV64-NEXT: {{ $}} @@ -930,6 +1168,7 @@ body: | ; RV32-NEXT: {{ $}} ; RV32-NEXT: dead renamable $x10 = LW killed renamable $x16, 0 :: (volatile load (s32) from %ir.p) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: load_common_ptr_no_opt ; RV64: liveins: $x16 ; RV64-NEXT: {{ $}} @@ -951,6 +1190,7 @@ body: | ; RV32-NEXT: {{ $}} ; RV32-NEXT: renamable $f10_f = FLW killed renamable $x16, 0 :: (load (s32) from %ir.g) ; RV32-NEXT: PseudoRET implicit $f10_f + ; ; RV64-LABEL: name: load_common_ptr_float_no_opt ; RV64: liveins: $x16 ; RV64-NEXT: {{ $}} @@ -972,6 +1212,7 @@ body: | ; RV32-NEXT: {{ $}} ; RV32-NEXT: renamable $f10_d = FLD killed renamable $x16, 0 :: (load (s64) from %ir.g) ; RV32-NEXT: PseudoRET implicit $f10_d + ; ; RV64-LABEL: name: load_common_ptr_double_no_opt ; RV64: liveins: $x16 ; RV64-NEXT: {{ $}} @@ -996,6 +1237,7 @@ body: | ; RV32-NEXT: renamable $x11 = ADDI $x0, 3 ; RV32-NEXT: SW killed renamable $x11, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_large_offset_no_opt ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} @@ -1024,6 +1266,7 @@ body: | ; RV32-NEXT: FSW killed renamable $f10_f, renamable $x10, 400 :: (volatile store (s32) into %ir.0) ; RV32-NEXT: FSW killed renamable $f11_f, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_large_offset_float_no_opt ; RV64: liveins: $x10, $f10_f, $f11_f ; RV64-NEXT: {{ $}} @@ -1048,6 +1291,7 @@ body: | ; RV32-NEXT: FSD killed renamable $f10_d, renamable $x10, 800 :: (volatile store (s64) into %ir.0) ; RV32-NEXT: FSD killed renamable $f11_d, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: store_large_offset_double_no_opt ; RV64: liveins: $x10, $f10_d, $f11_d ; RV64-NEXT: {{ $}} @@ -1072,6 +1316,7 @@ body: | ; RV32-NEXT: dead renamable $x11 = LW renamable $x10, 400 :: (volatile load (s32) from %ir.0) ; RV32-NEXT: dead renamable $x10 = LW killed renamable $x10, 404 :: (volatile load (s32) from %ir.1) ; RV32-NEXT: PseudoRET + ; ; RV64-LABEL: name: load_large_offset_no_opt ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} @@ -1096,6 +1341,7 @@ body: | ; RV32-NEXT: renamable $f10_f = FLW renamable $x10, 400 :: (load (s32) from %ir.arrayidx) ; RV32-NEXT: renamable $f11_f = FLW killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) ; RV32-NEXT: PseudoRET implicit $f10_f, implicit $f11_f + ; ; RV64-LABEL: name: load_large_offset_float_no_opt ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} @@ -1120,6 +1366,7 @@ body: | ; RV32-NEXT: renamable $f10_d = FLD renamable $x10, 800 :: (load (s64) from %ir.arrayidx) ; RV32-NEXT: renamable $f11_d = FLD killed renamable $x10, 808 :: (load (s64) from %ir.arrayidx1) ; RV32-NEXT: PseudoRET implicit $f10_d, implicit $f11_d + ; ; RV64-LABEL: name: load_large_offset_double_no_opt ; RV64: liveins: $x10 ; RV64-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/RISCV/pr51206.ll b/llvm/test/CodeGen/RISCV/pr51206.ll index f54031a..8aa145f 100644 --- a/llvm/test/CodeGen/RISCV/pr51206.ll +++ b/llvm/test/CodeGen/RISCV/pr51206.ll @@ -27,16 +27,12 @@ define signext i32 @wobble() nounwind { ; CHECK-NEXT: lui a2, %hi(global.3) ; CHECK-NEXT: li a3, 5 ; CHECK-NEXT: sw a1, %lo(global.3)(a2) -; CHECK-NEXT: bltu a0, a3, .LBB0_2 -; CHECK-NEXT: # %bb.1: # %bb10 -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: call quux -; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .LBB0_2: # %bb12 +; CHECK-NEXT: bgeu a0, a3, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb12 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %bb10 +; CHECK-NEXT: tail quux bb: %tmp = load i8, ptr @global, align 1 %tmp1 = zext i8 %tmp to i32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 1724b48..a09ab3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \ -; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \ -; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>) @@ -15086,31 +15086,27 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ret <32 x i64> %x } -; FIXME: This is a miscompile triggered by the mgather -> -; riscv.masked.strided.load combine. In order for it to trigger we need either a -; strided gather that RISCVGatherScatterLowering doesn't pick up, or a new -; strided gather generated by the widening sew combine. define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { ; RV32V-LABEL: masked_gather_widen_sew_negative_stride: ; RV32V: # %bb.0: -; RV32V-NEXT: addi a0, a0, -128 -; RV32V-NEXT: li a1, -128 +; RV32V-NEXT: addi a0, a0, 136 +; RV32V-NEXT: li a1, -136 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32V-NEXT: vlse64.v v8, (a0), a1 ; RV32V-NEXT: ret ; ; RV64V-LABEL: masked_gather_widen_sew_negative_stride: ; RV64V: # %bb.0: -; RV64V-NEXT: addi a0, a0, -128 -; RV64V-NEXT: li a1, -128 +; RV64V-NEXT: addi a0, a0, 136 +; RV64V-NEXT: li a1, -136 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vlse64.v v8, (a0), a1 ; RV64V-NEXT: ret ; ; RV32ZVE32F-LABEL: masked_gather_widen_sew_negative_stride: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lui a1, 16392 -; RV32ZVE32F-NEXT: addi a1, a1, 1152 +; RV32ZVE32F-NEXT: lui a1, 16393 +; RV32ZVE32F-NEXT: addi a1, a1, -888 ; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vmv.s.x v9, a1 ; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9 @@ -15118,8 +15114,8 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { ; ; RV64ZVE32F-LABEL: masked_gather_widen_sew_negative_stride: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi a1, a0, 128 -; RV64ZVE32F-NEXT: lw a2, 132(a0) +; RV64ZVE32F-NEXT: addi a1, a0, 136 +; RV64ZVE32F-NEXT: lw a2, 140(a0) ; RV64ZVE32F-NEXT: lw a3, 0(a0) ; RV64ZVE32F-NEXT: lw a0, 4(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -15128,7 +15124,7 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret - %ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 32, i64 33, i64 0, i64 1> + %ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 34, i64 35, i64 0, i64 1> %x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> shufflevector(<4 x i1> insertelement(<4 x i1> poison, i1 true, i32 0), <4 x i1> poison, <4 x i32> zeroinitializer), <4 x i32> poison) ret <4 x i32> %x } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll new file mode 100644 index 0000000..6c5dd04 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -0,0 +1,1701 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <8 x i7> @llvm.vp.sadd.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) + +define <8 x i7> @vsadd_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call <8 x i7> @llvm.vp.sadd.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) + ret <8 x i7> %v +} + +declare <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vsadd_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsadd_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsadd_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsadd_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsadd_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsadd_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vsadd_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsadd_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsadd_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsadd_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsadd_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsadd_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32) + +define <5 x i8> @vsadd_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsadd_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsadd_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsadd_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsadd_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsadd_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +declare <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vsadd_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsadd_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsadd_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsadd_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsadd_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsadd_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vsadd_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsadd_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsadd_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsadd_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsadd_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) + +define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v258i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: addi a0, a1, -128 +; CHECK-NEXT: sltu a3, a1, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +define <256 x i8> @vsadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v258i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: addi a1, a0, -128 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %head = insertelement <256 x i1> poison, i1 true, i32 0 + %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +; Test splitting when the %evl is a known constant. + +define <256 x i8> @vsadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { +; CHECK-LABEL: vsadd_vi_v258i8_evl129: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + ret <256 x i8> %v +} + +; FIXME: The upper half is doing nothing. + +define <256 x i8> @vsadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { +; CHECK-LABEL: vsadd_vi_v258i8_evl128: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + ret <256 x i8> %v +} + +declare <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vsadd_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsadd_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsadd_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsadd_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsadd_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsadd_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vsadd_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsadd_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsadd_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsadd_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsadd_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsadd_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vsadd_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsadd_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsadd_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsadd_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsadd_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsadd_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vsadd_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsadd_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsadd_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsadd_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsadd_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsadd_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vsadd_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsadd_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsadd_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsadd_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsadd_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsadd_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vsadd_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsadd_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsadd_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsadd_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsadd_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsadd_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vsadd_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsadd_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsadd_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsadd_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsadd_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsadd_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vsadd_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsadd_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsadd_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsadd_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsadd_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsadd_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vsadd_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsadd_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsadd_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsadd_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vsadd_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsadd_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsadd_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsadd_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vsadd_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsadd_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsadd_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsadd_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vsadd_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsadd_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsadd_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +; Test that split-legalization works as expected. + +declare <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) + +define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB108_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB108_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB108_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB108_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vi_v32i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB109_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB109_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v24 +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v16, v16, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vi_v32i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB109_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB109_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsadd.vi v8, v8, -1 +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsadd.vi v16, v16, -1 +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +; FIXME: We don't match vsadd.vi on RV32. + +define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { +; RV32-LABEL: vsadd_vx_v32i64_evl12: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v32i64_evl12: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + ret <32 x i64> %v +} + +define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { +; RV32-LABEL: vsadd_vx_v32i64_evl27: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vsadd.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_v32i64_evl27: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsadd.vi v8, v8, -1, v0.t +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + ret <32 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll new file mode 100644 index 0000000..6227f8a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -0,0 +1,1697 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <8 x i7> @llvm.vp.uadd.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) + +define <8 x i7> @vsaddu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vminu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %v = call <8 x i7> @llvm.vp.uadd.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) + ret <8 x i7> %v +} + +declare <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vsaddu_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsaddu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsaddu_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsaddu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsaddu_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vsaddu_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vsaddu_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsaddu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsaddu_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsaddu_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsaddu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsaddu_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vsaddu_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32) + +define <5 x i8> @vsaddu_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsaddu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsaddu_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsaddu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsaddu_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vsaddu_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +declare <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vsaddu_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsaddu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsaddu_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsaddu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsaddu_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vsaddu_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vsaddu_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsaddu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsaddu_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsaddu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsaddu_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vsaddu_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) + +define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v258i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: addi a0, a1, -128 +; CHECK-NEXT: sltu a3, a1, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +define <256 x i8> @vsaddu_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v258i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: addi a1, a0, -128 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %head = insertelement <256 x i1> poison, i1 true, i32 0 + %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +; Test splitting when the %evl is a known constant. + +define <256 x i8> @vsaddu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { +; CHECK-LABEL: vsaddu_vi_v258i8_evl129: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + ret <256 x i8> %v +} + +; FIXME: The upper half is doing nothing. + +define <256 x i8> @vsaddu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { +; CHECK-LABEL: vsaddu_vi_v258i8_evl128: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + ret <256 x i8> %v +} + +declare <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vsaddu_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsaddu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsaddu_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsaddu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsaddu_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vsaddu_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vsaddu_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsaddu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsaddu_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsaddu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsaddu_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vsaddu_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vsaddu_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsaddu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsaddu_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsaddu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsaddu_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vsaddu_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vsaddu_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsaddu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsaddu_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsaddu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsaddu_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vsaddu_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vsaddu_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsaddu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsaddu_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsaddu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsaddu_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vsaddu_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vsaddu_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsaddu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsaddu_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsaddu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsaddu_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vsaddu_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vsaddu_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsaddu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsaddu_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsaddu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsaddu_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vsaddu_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vsaddu_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsaddu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsaddu_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsaddu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsaddu_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vsaddu_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vsaddu_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsaddu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsaddu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsaddu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsaddu_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vsaddu_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vsaddu_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsaddu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsaddu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsaddu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsaddu_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vsaddu_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vsaddu_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsaddu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsaddu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsaddu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsaddu_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vsaddu_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vsaddu_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsaddu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsaddu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsaddu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsaddu_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vsaddu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +; Test that split-legalization works as expected. + +declare <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) + +define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB108_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB108_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB108_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB108_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vi_v32i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB109_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB109_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v24 +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v16, v16, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vi_v32i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB109_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB109_2: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vi v8, v8, -1 +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vi v16, v16, -1 +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +; FIXME: We don't match vsaddu.vi on RV32. + +define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { +; RV32-LABEL: vsaddu_vx_v32i64_evl12: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v32i64_evl12: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + ret <32 x i64> %v +} + +define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { +; RV32-LABEL: vsaddu_vx_v32i64_evl27: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vsaddu.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_v32i64_evl27: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vi v8, v8, -1, v0.t +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + ret <32 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll new file mode 100644 index 0000000..6360cf4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -0,0 +1,1745 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <8 x i7> @llvm.vp.ssub.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) + +define <8 x i7> @vssub_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vsra.vi v9, v9, 1 +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %v = call <8 x i7> @llvm.vp.ssub.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) + ret <8 x i7> %v +} + +declare <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vssub_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssub_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssub_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssub_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssub_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vssub_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssub_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssub_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssub_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssub_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssub_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32) + +define <5 x i8> @vssub_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssub_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssub_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssub_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssub_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssub_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +declare <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vssub_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssub_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssub_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssub_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssub_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vssub_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssub_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssub_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssub_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssub_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) + +define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v258i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: addi a0, a1, -128 +; CHECK-NEXT: sltu a3, a1, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a0 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +define <256 x i8> @vssub_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v258i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a2 +; CHECK-NEXT: addi a1, a0, -128 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a2 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %head = insertelement <256 x i1> poison, i1 true, i32 0 + %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +; Test splitting when the %evl is a known constant. + +define <256 x i8> @vssub_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { +; CHECK-LABEL: vssub_vi_v258i8_evl129: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + ret <256 x i8> %v +} + +; FIXME: The upper half is doing nothing. + +define <256 x i8> @vssub_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { +; CHECK-LABEL: vssub_vi_v258i8_evl128: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + ret <256 x i8> %v +} + +declare <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vssub_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssub_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssub_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssub_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssub_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vssub_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssub_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssub_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssub_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssub_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vssub_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssub_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssub_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssub_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssub_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vssub_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssub_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssub_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssub_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssub_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vssub_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssub_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssub_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssub_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssub_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vssub_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssub_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssub_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssub_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssub_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vssub_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssub_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssub_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssub_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssub_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vssub_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssub_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssub_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssub_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssub_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vssub_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssub_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssub_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssub_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vssub_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssub_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssub_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssub_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vssub_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssub_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssub_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssub_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vssub_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssub_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssub_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssub_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +; Test that split-legalization works as expected. + +declare <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) + +define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB108_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB108_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vssub.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB108_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB108_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vssub.vx v16, v16, a2, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vssub_vi_v32i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB109_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB109_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v24 +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v16, v16, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vi_v32i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB109_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB109_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a2 +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v16, v16, a2 +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +; FIXME: We don't match vssub.vi on RV32. + +define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { +; RV32-LABEL: vssub_vx_v32i64_evl12: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vssub.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v32i64_evl12: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vssub.vx v16, v16, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + ret <32 x i64> %v +} + +define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { +; RV32-LABEL: vssub_vx_v32i64_evl27: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vssub.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_v32i64_evl27: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vssub.vx v16, v16, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + ret <32 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll new file mode 100644 index 0000000..6ea9758 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -0,0 +1,1740 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <8 x i7> @llvm.vp.usub.sat.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) + +define <8 x i7> @vssubu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i7> @llvm.vp.usub.sat.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl) + ret <8 x i7> %v +} + +declare <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) + +define <2 x i8> @vssubu_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssubu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssubu_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssubu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssubu_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +define <2 x i8> @vssubu_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) + +define <4 x i8> @vssubu_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssubu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssubu_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssubu_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v4i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssubu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssubu_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +define <4 x i8> @vssubu_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i8> %v +} + +declare <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8>, <5 x i8>, <5 x i1>, i32) + +define <5 x i8> @vssubu_vv_v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssubu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssubu_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssubu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssubu_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +define <5 x i8> @vssubu_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v5i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %head = insertelement <5 x i1> poison, i1 true, i32 0 + %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + ret <5 x i8> %v +} + +declare <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) + +define <8 x i8> @vssubu_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssubu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssubu_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssubu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssubu_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +define <8 x i8> @vssubu_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) + +define <16 x i8> @vssubu_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssubu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssubu_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssubu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssubu_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +define <16 x i8> @vssubu_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i8> %v +} + +declare <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) + +define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v258i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: addi a0, a1, -128 +; CHECK-NEXT: sltu a3, a1, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a0 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB32_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB32_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +define <256 x i8> @vssubu_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v258i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB33_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB33_2: +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a2 +; CHECK-NEXT: addi a1, a0, -128 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a2 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %head = insertelement <256 x i1> poison, i1 true, i32 0 + %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + ret <256 x i8> %v +} + +; Test splitting when the %evl is a known constant. + +define <256 x i8> @vssubu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { +; CHECK-LABEL: vssubu_vi_v258i8_evl129: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + ret <256 x i8> %v +} + +; FIXME: The upper half is doing nothing. + +define <256 x i8> @vssubu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { +; CHECK-LABEL: vssubu_vi_v258i8_evl128: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + ret <256 x i8> %v +} + +declare <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) + +define <2 x i16> @vssubu_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssubu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssubu_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssubu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssubu_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +define <2 x i16> @vssubu_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) + +define <4 x i16> @vssubu_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssubu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssubu_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssubu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssubu_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +define <4 x i16> @vssubu_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) + +define <8 x i16> @vssubu_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssubu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssubu_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssubu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssubu_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +define <8 x i16> @vssubu_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) + +define <16 x i16> @vssubu_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssubu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssubu_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssubu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssubu_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +define <16 x i16> @vssubu_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) + +define <2 x i32> @vssubu_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssubu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssubu_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssubu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssubu_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +define <2 x i32> @vssubu_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) + +define <4 x i32> @vssubu_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssubu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssubu_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssubu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssubu_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +define <4 x i32> @vssubu_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) + +define <8 x i32> @vssubu_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssubu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssubu_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssubu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssubu_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +define <8 x i32> @vssubu_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) + +define <16 x i32> @vssubu_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssubu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssubu_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssubu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssubu_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +define <16 x i32> @vssubu_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) + +define <2 x i64> @vssubu_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssubu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssubu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssubu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssubu_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +define <2 x i64> @vssubu_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %head = insertelement <2 x i1> poison, i1 true, i32 0 + %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) + +define <4 x i64> @vssubu_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssubu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssubu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssubu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssubu_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +define <4 x i64> @vssubu_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %head = insertelement <4 x i1> poison, i1 true, i32 0 + %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vssubu_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssubu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssubu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssubu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssubu_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vssubu_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32) + +define <16 x i64> @vssubu_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssubu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssubu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v16i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v16i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssubu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v16i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v16i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssubu_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vssubu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +; Test that split-legalization works as expected. + +declare <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) + +define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_v32i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB108_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB108_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v32i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB108_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB108_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a2, v0.t +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vssubu.vx v16, v16, a2, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vi_v32i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB109_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB109_2: +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v24 +; RV32-NEXT: addi a1, a0, -16 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v16, v16, v24 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vi_v32i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: li a2, 16 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB109_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 +; RV64-NEXT: .LBB109_2: +; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a2 +; RV64-NEXT: addi a1, a0, -16 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v16, v16, a2 +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %head = insertelement <32 x i1> poison, i1 true, i32 0 + %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + ret <32 x i64> %v +} + +; FIXME: We don't match vssubu.vi on RV32. + +define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { +; RV32-LABEL: vssubu_vx_v32i64_evl12: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v32i64_evl12: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: vsetivli zero, 0, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + ret <32 x i64> %v +} + +define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { +; RV32-LABEL: vssubu_vx_v32i64_evl27: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v1, v0, 2 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t +; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vssubu.vv v16, v16, v24, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_v32i64_evl27: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a0, -1 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + ret <32 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll index 574c265..a084b53 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -385,12 +385,12 @@ define <32 x i64> @vwsubu_v32i64(ptr %x, ptr %y) nounwind { define <2 x i32> @vwsubu_v2i32_v2i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_v2i32_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vzext.vf2 v11, v9 -; CHECK-NEXT: vwsubu.vv v8, v10, v11 +; CHECK-NEXT: vwsubu.vv v10, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v8, v10 ; CHECK-NEXT: ret %a = load <2 x i8>, ptr %x %b = load <2 x i8>, ptr %y @@ -899,12 +899,12 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { define <2 x i32> @vwsubu_v2i32_of_v2i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_v2i32_of_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vzext.vf2 v11, v9 -; CHECK-NEXT: vwsubu.vv v8, v10, v11 +; CHECK-NEXT: vwsubu.vv v10, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsext.vf2 v8, v10 ; CHECK-NEXT: ret %a = load <2 x i8>, ptr %x %b = load <2 x i8>, ptr %y @@ -917,12 +917,12 @@ define <2 x i32> @vwsubu_v2i32_of_v2i8(ptr %x, ptr %y) { define <2 x i64> @vwsubu_v2i64_of_v2i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_v2i64_of_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) -; CHECK-NEXT: vzext.vf4 v10, v8 -; CHECK-NEXT: vzext.vf4 v11, v9 -; CHECK-NEXT: vwsubu.vv v8, v10, v11 +; CHECK-NEXT: vwsubu.vv v10, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf4 v8, v10 ; CHECK-NEXT: ret %a = load <2 x i8>, ptr %x %b = load <2 x i8>, ptr %y @@ -935,12 +935,12 @@ define <2 x i64> @vwsubu_v2i64_of_v2i8(ptr %x, ptr %y) { define <2 x i64> @vwsubu_v2i64_of_v2i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_v2i64_of_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) -; CHECK-NEXT: vzext.vf2 v10, v8 -; CHECK-NEXT: vzext.vf2 v11, v9 -; CHECK-NEXT: vwsubu.vv v8, v10, v11 +; CHECK-NEXT: vwsubu.vv v10, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vsext.vf2 v8, v10 ; CHECK-NEXT: ret %a = load <2 x i16>, ptr %x %b = load <2 x i16>, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll new file mode 100644 index 0000000..caaeae5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll @@ -0,0 +1,2015 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <vscale x 8 x i7> @llvm.vp.sadd.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i7> @vsadd_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0 + %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i7> @llvm.vp.sadd.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl) + ret <vscale x 8 x i7> %v +} + +declare <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i8> @vsadd_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsadd_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsadd_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsadd_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsadd_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsadd_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsadd_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.sadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +declare <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i8> @vsadd_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsadd_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsadd_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsadd_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsadd_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsadd_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.sadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +declare <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32) + +define <vscale x 3 x i8> @vsadd_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsadd_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsadd_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsadd_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsadd_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsadd_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.sadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +declare <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i8> @vsadd_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsadd_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsadd_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsadd_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsadd_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsadd_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.sadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +declare <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i8> @vsadd_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsadd_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsadd_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsadd_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsadd_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsadd_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.sadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +declare <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i8> @vsadd_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsadd_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsadd_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsadd_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsadd_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsadd_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.sadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +declare <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i8> @vsadd_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsadd_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsadd_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsadd_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsadd_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsadd_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.sadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +declare <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32) + +define <vscale x 64 x i8> @vsadd_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsadd_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsadd_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsadd_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsadd_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsadd_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.sadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +; Test that split-legalization works when the mask itself needs splitting. + +declare <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32) + +define <vscale x 128 x i8> @vsadd_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub a2, a1, a0 +; CHECK-NEXT: sltu a3, a1, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: bltu a1, a0, .LBB50_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer + %v = call <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl) + ret <vscale x 128 x i8> %v +} + +define <vscale x 128 x i8> @vsadd_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv128i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1 +; CHECK-NEXT: bltu a0, a1, .LBB51_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer + %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer + %v = call <vscale x 128 x i8> @llvm.vp.sadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl) + ret <vscale x 128 x i8> %v +} + +declare <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i16> @vsadd_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsadd_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsadd_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsadd_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsadd_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsadd_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.sadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +declare <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i16> @vsadd_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsadd_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsadd_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsadd_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsadd_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsadd_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.sadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +declare <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i16> @vsadd_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsadd_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsadd_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsadd_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsadd_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsadd_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.sadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +declare <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i16> @vsadd_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsadd_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsadd_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsadd_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsadd_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsadd_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.sadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +declare <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i16> @vsadd_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsadd_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsadd_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsadd_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsadd_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsadd_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.sadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +declare <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i16> @vsadd_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsadd_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsadd_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsadd_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsadd_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsadd_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.sadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +declare <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i32> @vsadd_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsadd_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsadd_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsadd_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsadd_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsadd_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.sadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +declare <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i32> @vsadd_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsadd_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsadd_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsadd_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsadd_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsadd_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.sadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +declare <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i32> @vsadd_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsadd_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsadd_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsadd_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsadd_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsadd_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.sadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +declare <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i32> @vsadd_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsadd_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsadd_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsadd_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsadd_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsadd_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.sadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +declare <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i32> @vsadd_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsadd_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsadd_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsadd_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsadd_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsadd_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.sadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +; Test that split-legalization works then the mask needs manual splitting. + +declare <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i32> @vsadd_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB118_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB118_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i32> %v +} + +define <vscale x 32 x i32> @vsadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv32i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vi v16, v16, -1 +; CHECK-NEXT: bltu a0, a1, .LBB119_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB119_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i32> @llvm.vp.sadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i32> %v +} + +declare <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i64> @vsadd_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsadd_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsadd_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsadd_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsadd_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.sadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +declare <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i64> @vsadd_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsadd_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsadd_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsadd_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsadd_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.sadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +declare <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i64> @vsadd_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsadd_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsadd_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsadd_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsadd_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.sadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +declare <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i64> @vsadd_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsadd_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsadd_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsadd_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsadd_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsadd_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsadd_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsadd_vi_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsadd.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.sadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll new file mode 100644 index 0000000..c0779e5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll @@ -0,0 +1,2014 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <vscale x 8 x i7> @llvm.vp.uadd.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i7> @vsaddu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t +; CHECK-NEXT: vminu.vx v8, v8, a2, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0 + %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i7> @llvm.vp.uadd.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl) + ret <vscale x 8 x i7> %v +} + +declare <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i8> @vsaddu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsaddu_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsaddu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsaddu_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsaddu_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsaddu_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vsaddu_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.uadd.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +declare <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i8> @vsaddu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsaddu_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsaddu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsaddu_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsaddu_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vsaddu_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.uadd.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +declare <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32) + +define <vscale x 3 x i8> @vsaddu_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsaddu_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsaddu_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsaddu_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsaddu_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vsaddu_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.uadd.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +declare <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i8> @vsaddu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsaddu_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsaddu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsaddu_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsaddu_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vsaddu_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.uadd.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +declare <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i8> @vsaddu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsaddu_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsaddu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsaddu_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsaddu_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vsaddu_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.uadd.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +declare <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i8> @vsaddu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsaddu_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsaddu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsaddu_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsaddu_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vsaddu_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.uadd.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +declare <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i8> @vsaddu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsaddu_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsaddu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsaddu_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsaddu_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vsaddu_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.uadd.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +declare <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32) + +define <vscale x 64 x i8> @vsaddu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsaddu_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsaddu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsaddu_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsaddu_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vsaddu_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.uadd.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +; Test that split-legalization works when the mask itself needs splitting. + +declare <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32) + +define <vscale x 128 x i8> @vsaddu_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub a2, a1, a0 +; CHECK-NEXT: sltu a3, a1, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t +; CHECK-NEXT: bltu a1, a0, .LBB50_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer + %v = call <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl) + ret <vscale x 128 x i8> %v +} + +define <vscale x 128 x i8> @vsaddu_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv128i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1 +; CHECK-NEXT: bltu a0, a1, .LBB51_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer + %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer + %v = call <vscale x 128 x i8> @llvm.vp.uadd.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl) + ret <vscale x 128 x i8> %v +} + +declare <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i16> @vsaddu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsaddu_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsaddu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsaddu_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsaddu_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vsaddu_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.uadd.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +declare <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i16> @vsaddu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsaddu_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsaddu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsaddu_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsaddu_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vsaddu_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.uadd.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +declare <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i16> @vsaddu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsaddu_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsaddu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsaddu_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsaddu_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vsaddu_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.uadd.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +declare <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i16> @vsaddu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsaddu_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsaddu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsaddu_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsaddu_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vsaddu_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.uadd.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +declare <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i16> @vsaddu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsaddu_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsaddu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsaddu_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsaddu_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vsaddu_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.uadd.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +declare <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i16> @vsaddu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsaddu_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsaddu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsaddu_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsaddu_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vsaddu_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.uadd.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +declare <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i32> @vsaddu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsaddu_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsaddu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsaddu_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsaddu_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vsaddu_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.uadd.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +declare <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i32> @vsaddu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsaddu_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsaddu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsaddu_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsaddu_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vsaddu_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.uadd.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +declare <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i32> @vsaddu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsaddu_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsaddu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsaddu_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsaddu_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vsaddu_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.uadd.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +declare <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i32> @vsaddu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsaddu_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsaddu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsaddu_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsaddu_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vsaddu_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.uadd.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +declare <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i32> @vsaddu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsaddu_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsaddu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsaddu_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsaddu_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vsaddu_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.uadd.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +; Test that split-legalization works then the mask needs manual splitting. + +declare <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i32> @vsaddu_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB118_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB118_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i32> %v +} + +define <vscale x 32 x i32> @vsaddu_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv32i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v16, v16, -1 +; CHECK-NEXT: bltu a0, a1, .LBB119_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB119_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i32> @llvm.vp.uadd.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i32> %v +} + +declare <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i64> @vsaddu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsaddu_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsaddu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsaddu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsaddu_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vsaddu_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.uadd.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +declare <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i64> @vsaddu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsaddu_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsaddu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsaddu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsaddu_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vsaddu_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.uadd.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +declare <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i64> @vsaddu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsaddu_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsaddu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsaddu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsaddu_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vsaddu_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.uadd.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +declare <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i64> @vsaddu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsaddu_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsaddu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsaddu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vsaddu_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsaddu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsaddu_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsaddu_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vsaddu_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vsaddu_vi_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vi v8, v8, -1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.uadd.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll new file mode 100644 index 0000000..2d51a2e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -0,0 +1,2067 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <vscale x 8 x i7> @llvm.vp.ssub.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i7> @vssub_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v8 +; CHECK-NEXT: vsra.vi v8, v8, 1 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 192 +; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0 + %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i7> @llvm.vp.ssub.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl) + ret <vscale x 8 x i7> %v +} + +declare <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i8> @vssub_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssub_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssub_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssub_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssub_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssub_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssub_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.ssub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +declare <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i8> @vssub_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssub_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssub_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssub_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssub_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssub_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.ssub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +declare <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32) + +define <vscale x 3 x i8> @vssub_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssub_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssub_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssub_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssub_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssub_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.ssub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +declare <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i8> @vssub_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssub_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssub_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssub_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssub_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssub_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.ssub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +declare <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i8> @vssub_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssub_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssub_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssub_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssub_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssub_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.ssub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +declare <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i8> @vssub_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssub_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssub_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssub_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssub_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssub_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.ssub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +declare <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i8> @vssub_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssub_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssub_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssub_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssub_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssub_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.ssub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +declare <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32) + +define <vscale x 64 x i8> @vssub_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssub_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssub_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssub_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssub_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssub_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.ssub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +; Test that split-legalization works when the mask itself needs splitting. + +declare <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32) + +define <vscale x 128 x i8> @vssub_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub a0, a1, a2 +; CHECK-NEXT: sltu a3, a1, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a0 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB50_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer + %v = call <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl) + ret <vscale x 128 x i8> %v +} + +define <vscale x 128 x i8> @vssub_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv128i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a2 +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a2 +; CHECK-NEXT: bltu a0, a1, .LBB51_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a2 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer + %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer + %v = call <vscale x 128 x i8> @llvm.vp.ssub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl) + ret <vscale x 128 x i8> %v +} + +declare <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i16> @vssub_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssub_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssub_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssub_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssub_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssub_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.ssub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +declare <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i16> @vssub_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssub_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssub_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssub_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssub_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssub_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.ssub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +declare <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i16> @vssub_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssub_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssub_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssub_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssub_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssub_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.ssub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +declare <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i16> @vssub_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssub_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssub_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssub_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssub_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssub_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.ssub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +declare <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i16> @vssub_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssub_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssub_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssub_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssub_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssub_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.ssub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +declare <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i16> @vssub_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssub_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssub_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssub_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssub_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssub_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.ssub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +declare <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i32> @vssub_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssub_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssub_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssub_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssub_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssub_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.ssub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +declare <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i32> @vssub_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssub_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssub_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssub_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssub_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssub_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.ssub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +declare <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i32> @vssub_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssub_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssub_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssub_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssub_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssub_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.ssub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +declare <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i32> @vssub_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssub_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssub_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssub_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssub_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssub_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.ssub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +declare <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i32> @vssub_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssub_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssub_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssub_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssub_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssub_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.ssub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +; Test that split-legalization works then the mask needs manual splitting. + +declare <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i32> @vssub_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a1, a2, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub a1, a0, a2 +; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a1 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a1, v0.t +; CHECK-NEXT: bltu a0, a2, .LBB118_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: .LBB118_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i32> %v +} + +define <vscale x 32 x i32> @vssub_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv32i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a2 +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vssub.vx v16, v16, a2 +; CHECK-NEXT: bltu a0, a1, .LBB119_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB119_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a2 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i32> @llvm.vp.ssub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i32> %v +} + +declare <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i64> @vssub_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssub_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssub_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssub_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssub_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.ssub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +declare <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i64> @vssub_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssub_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssub_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssub_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssub_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.ssub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +declare <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i64> @vssub_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssub_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssub_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssub_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssub_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.ssub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +declare <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i64> @vssub_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssub_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssub_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssub_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vssub.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssub_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssub_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssub_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssub_vi_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssub.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.ssub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll new file mode 100644 index 0000000..e5589ce --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -0,0 +1,2065 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <vscale x 8 x i7> @llvm.vp.usub.sat.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i7> @vssubu_vx_nxv8i7(<vscale x 8 x i7> %a, i7 signext %b, <vscale x 8 x i1> %mask, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv8i7: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 127 +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vand.vx v9, v9, a2 +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i7> poison, i7 %b, i32 0 + %vb = shufflevector <vscale x 8 x i7> %elt.head, <vscale x 8 x i7> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i7> @llvm.vp.usub.sat.nxv8i7(<vscale x 8 x i7> %a, <vscale x 8 x i7> %vb, <vscale x 8 x i1> %mask, i32 %evl) + ret <vscale x 8 x i7> %v +} + +declare <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i8> @vssubu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssubu_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssubu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssubu_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv1i8_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v9, v8, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssubu_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssubu_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +define <vscale x 1 x i8> @vssubu_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i8> @llvm.vp.usub.sat.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i8> %v +} + +declare <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i8> @vssubu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssubu_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssubu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssubu_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssubu_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +define <vscale x 2 x i8> @vssubu_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i8> @llvm.vp.usub.sat.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i8> %v +} + +declare <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8>, <vscale x 3 x i8>, <vscale x 3 x i1>, i32) + +define <vscale x 3 x i8> @vssubu_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssubu_vv_nxv3i8_unmasked(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %b, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssubu_vx_nxv3i8(<vscale x 3 x i8> %va, i8 %b, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssubu_vx_nxv3i8_unmasked(<vscale x 3 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssubu_vi_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +define <vscale x 3 x i8> @vssubu_vi_nxv3i8_unmasked(<vscale x 3 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv3i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 3 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer + %head = insertelement <vscale x 3 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 3 x i1> %head, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer + %v = call <vscale x 3 x i8> @llvm.vp.usub.sat.nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 %evl) + ret <vscale x 3 x i8> %v +} + +declare <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i8> @vssubu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssubu_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssubu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssubu_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssubu_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +define <vscale x 4 x i8> @vssubu_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i8> @llvm.vp.usub.sat.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i8> %v +} + +declare <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i8> @vssubu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssubu_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssubu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssubu_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssubu_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +define <vscale x 8 x i8> @vssubu_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i8> @llvm.vp.usub.sat.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i8> %v +} + +declare <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i8> @vssubu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssubu_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssubu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssubu_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssubu_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +define <vscale x 16 x i8> @vssubu_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i8> @llvm.vp.usub.sat.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i8> %v +} + +declare <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i8> @vssubu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssubu_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssubu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssubu_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssubu_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +define <vscale x 32 x i8> @vssubu_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i8> @llvm.vp.usub.sat.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i8> %v +} + +declare <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32) + +define <vscale x 64 x i8> @vssubu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssubu_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssubu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssubu_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssubu_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +define <vscale x 64 x i8> @vssubu_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 64 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer + %head = insertelement <vscale x 64 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer + %v = call <vscale x 64 x i8> @llvm.vp.usub.sat.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl) + ret <vscale x 64 x i8> %v +} + +; Test that split-legalization works when the mask itself needs splitting. + +declare <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8>, <vscale x 128 x i8>, <vscale x 128 x i1>, i32) + +define <vscale x 128 x i8> @vssubu_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a0) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub a0, a1, a2 +; CHECK-NEXT: sltu a3, a1, a0 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a0 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB50_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB50_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer + %v = call <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl) + ret <vscale x 128 x i8> %v +} + +define <vscale x 128 x i8> @vssubu_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv128i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a2 +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a2 +; CHECK-NEXT: bltu a0, a1, .LBB51_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a2 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 128 x i8> poison, i8 -1, i32 0 + %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer + %head = insertelement <vscale x 128 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> poison, <vscale x 128 x i32> zeroinitializer + %v = call <vscale x 128 x i8> @llvm.vp.usub.sat.nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 %evl) + ret <vscale x 128 x i8> %v +} + +declare <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i16> @vssubu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssubu_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssubu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssubu_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssubu_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +define <vscale x 1 x i16> @vssubu_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i16> @llvm.vp.usub.sat.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i16> %v +} + +declare <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i16> @vssubu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssubu_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssubu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssubu_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssubu_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +define <vscale x 2 x i16> @vssubu_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i16> @llvm.vp.usub.sat.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i16> %v +} + +declare <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i16> @vssubu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssubu_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssubu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssubu_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssubu_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +define <vscale x 4 x i16> @vssubu_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i16> @llvm.vp.usub.sat.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i16> %v +} + +declare <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i16> @vssubu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssubu_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssubu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssubu_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssubu_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +define <vscale x 8 x i16> @vssubu_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i16> @llvm.vp.usub.sat.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i16> %v +} + +declare <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i16> @vssubu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssubu_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssubu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssubu_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssubu_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +define <vscale x 16 x i16> @vssubu_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i16> @llvm.vp.usub.sat.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i16> %v +} + +declare <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i16> @vssubu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssubu_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssubu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssubu_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssubu_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +define <vscale x 32 x i16> @vssubu_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i16> poison, i16 -1, i32 0 + %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i16> @llvm.vp.usub.sat.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i16> %v +} + +declare <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i32> @vssubu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssubu_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssubu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssubu_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssubu_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +define <vscale x 1 x i32> @vssubu_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i32> @llvm.vp.usub.sat.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i32> %v +} + +declare <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i32> @vssubu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssubu_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssubu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssubu_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssubu_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +define <vscale x 2 x i32> @vssubu_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i32> @llvm.vp.usub.sat.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i32> %v +} + +declare <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i32> @vssubu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssubu_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssubu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssubu_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssubu_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +define <vscale x 4 x i32> @vssubu_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i32> @llvm.vp.usub.sat.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i32> %v +} + +declare <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i32> @vssubu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssubu_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssubu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssubu_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssubu_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +define <vscale x 8 x i32> @vssubu_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i32> @llvm.vp.usub.sat.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i32> %v +} + +declare <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32) + +define <vscale x 16 x i32> @vssubu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssubu_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssubu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssubu_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssubu_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +define <vscale x 16 x i32> @vssubu_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 16 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer + %head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer + %v = call <vscale x 16 x i32> @llvm.vp.usub.sat.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl) + ret <vscale x 16 x i32> %v +} + +; Test that split-legalization works then the mask needs manual splitting. + +declare <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32>, <vscale x 32 x i32>, <vscale x 32 x i1>, i32) + +define <vscale x 32 x i32> @vssubu_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a1, a2, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub a1, a0, a2 +; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a1 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a1, v0.t +; CHECK-NEXT: bltu a0, a2, .LBB118_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: .LBB118_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i32> %v +} + +define <vscale x 32 x i32> @vssubu_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv32i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a2 +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vx v16, v16, a2 +; CHECK-NEXT: bltu a0, a1, .LBB119_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB119_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a2 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 32 x i32> poison, i32 -1, i32 0 + %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer + %head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer + %v = call <vscale x 32 x i32> @llvm.vp.usub.sat.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> %vb, <vscale x 32 x i1> %m, i32 %evl) + ret <vscale x 32 x i32> %v +} + +declare <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32) + +define <vscale x 1 x i64> @vssubu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssubu_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssubu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v9, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssubu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssubu_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +define <vscale x 1 x i64> @vssubu_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 1 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer + %v = call <vscale x 1 x i64> @llvm.vp.usub.sat.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl) + ret <vscale x 1 x i64> %v +} + +declare <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32) + +define <vscale x 2 x i64> @vssubu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssubu_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssubu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v10, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssubu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssubu_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +define <vscale x 2 x i64> @vssubu_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 2 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %v = call <vscale x 2 x i64> @llvm.vp.usub.sat.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl) + ret <vscale x 2 x i64> %v +} + +declare <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32) + +define <vscale x 4 x i64> @vssubu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssubu_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssubu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v12, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssubu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssubu_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +define <vscale x 4 x i64> @vssubu_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 4 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer + %head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer + %v = call <vscale x 4 x i64> @llvm.vp.usub.sat.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl) + ret <vscale x 4 x i64> %v +} + +declare <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32) + +define <vscale x 8 x i64> @vssubu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16, v0.t +; CHECK-NEXT: ret + %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssubu_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssubu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssubu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) { +; RV32-LABEL: vssubu_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vssubu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vssubu_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssubu_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} + +define <vscale x 8 x i64> @vssubu_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vssubu_vi_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vssubu.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <vscale x 8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer + %head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0 + %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer + %v = call <vscale x 8 x i64> @llvm.vp.usub.sat.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl) + ret <vscale x 8 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll index e01984b..e07e520 100644 --- a/llvm/test/CodeGen/RISCV/select.ll +++ b/llvm/test/CodeGen/RISCV/select.ll @@ -1606,23 +1606,255 @@ define i32 @select_cst_unknown(i32 signext %a, i32 signext %b) { ; RV64IMXVTCONDOPS-LABEL: select_cst_unknown: ; RV64IMXVTCONDOPS: # %bb.0: ; RV64IMXVTCONDOPS-NEXT: slt a0, a0, a1 -; RV64IMXVTCONDOPS-NEXT: li a1, -7 -; RV64IMXVTCONDOPS-NEXT: vt.maskcn a1, a1, a0 -; RV64IMXVTCONDOPS-NEXT: li a2, 5 -; RV64IMXVTCONDOPS-NEXT: vt.maskc a0, a2, a0 -; RV64IMXVTCONDOPS-NEXT: or a0, a0, a1 +; RV64IMXVTCONDOPS-NEXT: li a1, -12 +; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0 +; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 5 ; RV64IMXVTCONDOPS-NEXT: ret ; ; CHECKZICOND-LABEL: select_cst_unknown: ; CHECKZICOND: # %bb.0: ; CHECKZICOND-NEXT: slt a0, a0, a1 -; CHECKZICOND-NEXT: li a1, -7 -; CHECKZICOND-NEXT: czero.nez a1, a1, a0 -; CHECKZICOND-NEXT: li a2, 5 -; CHECKZICOND-NEXT: czero.eqz a0, a2, a0 -; CHECKZICOND-NEXT: or a0, a0, a1 +; CHECKZICOND-NEXT: li a1, -12 +; CHECKZICOND-NEXT: czero.nez a0, a1, a0 +; CHECKZICOND-NEXT: addi a0, a0, 5 ; CHECKZICOND-NEXT: ret %cond = icmp slt i32 %a, %b %ret = select i1 %cond, i32 5, i32 -7 ret i32 %ret } + +define i32 @select_cst1(i1 zeroext %cond) { +; RV32IM-LABEL: select_cst1: +; RV32IM: # %bb.0: +; RV32IM-NEXT: mv a1, a0 +; RV32IM-NEXT: li a0, 10 +; RV32IM-NEXT: bnez a1, .LBB43_2 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: li a0, 20 +; RV32IM-NEXT: .LBB43_2: +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_cst1: +; RV64IM: # %bb.0: +; RV64IM-NEXT: mv a1, a0 +; RV64IM-NEXT: li a0, 10 +; RV64IM-NEXT: bnez a1, .LBB43_2 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: li a0, 20 +; RV64IM-NEXT: .LBB43_2: +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_cst1: +; RV64IMXVTCONDOPS: # %bb.0: +; RV64IMXVTCONDOPS-NEXT: li a1, 10 +; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0 +; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 10 +; RV64IMXVTCONDOPS-NEXT: ret +; +; CHECKZICOND-LABEL: select_cst1: +; CHECKZICOND: # %bb.0: +; CHECKZICOND-NEXT: li a1, 10 +; CHECKZICOND-NEXT: czero.nez a0, a1, a0 +; CHECKZICOND-NEXT: addi a0, a0, 10 +; CHECKZICOND-NEXT: ret + %ret = select i1 %cond, i32 10, i32 20 + ret i32 %ret +} + +define i32 @select_cst2(i1 zeroext %cond) { +; RV32IM-LABEL: select_cst2: +; RV32IM: # %bb.0: +; RV32IM-NEXT: mv a1, a0 +; RV32IM-NEXT: li a0, 10 +; RV32IM-NEXT: bnez a1, .LBB44_2 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: lui a0, 5 +; RV32IM-NEXT: addi a0, a0, -480 +; RV32IM-NEXT: .LBB44_2: +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_cst2: +; RV64IM: # %bb.0: +; RV64IM-NEXT: mv a1, a0 +; RV64IM-NEXT: li a0, 10 +; RV64IM-NEXT: bnez a1, .LBB44_2 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: lui a0, 5 +; RV64IM-NEXT: addiw a0, a0, -480 +; RV64IM-NEXT: .LBB44_2: +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_cst2: +; RV64IMXVTCONDOPS: # %bb.0: +; RV64IMXVTCONDOPS-NEXT: lui a1, 5 +; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -490 +; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0 +; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 10 +; RV64IMXVTCONDOPS-NEXT: ret +; +; RV32IMZICOND-LABEL: select_cst2: +; RV32IMZICOND: # %bb.0: +; RV32IMZICOND-NEXT: lui a1, 5 +; RV32IMZICOND-NEXT: addi a1, a1, -490 +; RV32IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV32IMZICOND-NEXT: addi a0, a0, 10 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_cst2: +; RV64IMZICOND: # %bb.0: +; RV64IMZICOND-NEXT: lui a1, 5 +; RV64IMZICOND-NEXT: addiw a1, a1, -490 +; RV64IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV64IMZICOND-NEXT: addi a0, a0, 10 +; RV64IMZICOND-NEXT: ret + %ret = select i1 %cond, i32 10, i32 20000 + ret i32 %ret +} + +define i32 @select_cst3(i1 zeroext %cond) { +; RV32IM-LABEL: select_cst3: +; RV32IM: # %bb.0: +; RV32IM-NEXT: bnez a0, .LBB45_2 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: lui a0, 5 +; RV32IM-NEXT: addi a0, a0, -480 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB45_2: +; RV32IM-NEXT: lui a0, 7 +; RV32IM-NEXT: addi a0, a0, 1328 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_cst3: +; RV64IM: # %bb.0: +; RV64IM-NEXT: bnez a0, .LBB45_2 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: lui a0, 5 +; RV64IM-NEXT: addiw a0, a0, -480 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB45_2: +; RV64IM-NEXT: lui a0, 7 +; RV64IM-NEXT: addiw a0, a0, 1328 +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_cst3: +; RV64IMXVTCONDOPS: # %bb.0: +; RV64IMXVTCONDOPS-NEXT: lui a1, 1048574 +; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, -1808 +; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0 +; RV64IMXVTCONDOPS-NEXT: lui a1, 7 +; RV64IMXVTCONDOPS-NEXT: addiw a1, a1, 1328 +; RV64IMXVTCONDOPS-NEXT: add a0, a0, a1 +; RV64IMXVTCONDOPS-NEXT: ret +; +; RV32IMZICOND-LABEL: select_cst3: +; RV32IMZICOND: # %bb.0: +; RV32IMZICOND-NEXT: lui a1, 1048574 +; RV32IMZICOND-NEXT: addi a1, a1, -1808 +; RV32IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV32IMZICOND-NEXT: lui a1, 7 +; RV32IMZICOND-NEXT: addi a1, a1, 1328 +; RV32IMZICOND-NEXT: add a0, a0, a1 +; RV32IMZICOND-NEXT: ret +; +; RV64IMZICOND-LABEL: select_cst3: +; RV64IMZICOND: # %bb.0: +; RV64IMZICOND-NEXT: lui a1, 1048574 +; RV64IMZICOND-NEXT: addiw a1, a1, -1808 +; RV64IMZICOND-NEXT: czero.nez a0, a1, a0 +; RV64IMZICOND-NEXT: lui a1, 7 +; RV64IMZICOND-NEXT: addiw a1, a1, 1328 +; RV64IMZICOND-NEXT: add a0, a0, a1 +; RV64IMZICOND-NEXT: ret + %ret = select i1 %cond, i32 30000, i32 20000 + ret i32 %ret +} + +define i32 @select_cst4(i1 zeroext %cond) { +; CHECK-LABEL: select_cst4: +; CHECK: # %bb.0: +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: xori a0, a0, 2047 +; CHECK-NEXT: ret + %ret = select i1 %cond, i32 -2048, i32 2047 + ret i32 %ret +} + +define i32 @select_cst5(i1 zeroext %cond) { +; RV32IM-LABEL: select_cst5: +; RV32IM: # %bb.0: +; RV32IM-NEXT: mv a1, a0 +; RV32IM-NEXT: li a0, 2047 +; RV32IM-NEXT: bnez a1, .LBB47_2 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: lui a0, 1 +; RV32IM-NEXT: addi a0, a0, -2047 +; RV32IM-NEXT: .LBB47_2: +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_cst5: +; RV64IM: # %bb.0: +; RV64IM-NEXT: mv a1, a0 +; RV64IM-NEXT: li a0, 2047 +; RV64IM-NEXT: bnez a1, .LBB47_2 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: lui a0, 1 +; RV64IM-NEXT: addiw a0, a0, -2047 +; RV64IM-NEXT: .LBB47_2: +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_cst5: +; RV64IMXVTCONDOPS: # %bb.0: +; RV64IMXVTCONDOPS-NEXT: li a1, 2 +; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0 +; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047 +; RV64IMXVTCONDOPS-NEXT: ret +; +; CHECKZICOND-LABEL: select_cst5: +; CHECKZICOND: # %bb.0: +; CHECKZICOND-NEXT: li a1, 2 +; CHECKZICOND-NEXT: czero.nez a0, a1, a0 +; CHECKZICOND-NEXT: addi a0, a0, 2047 +; CHECKZICOND-NEXT: ret + %ret = select i1 %cond, i32 2047, i32 2049 + ret i32 %ret +} + +define i32 @select_cst6(i1 zeroext %cond) { +; RV32IM-LABEL: select_cst6: +; RV32IM: # %bb.0: +; RV32IM-NEXT: bnez a0, .LBB48_2 +; RV32IM-NEXT: # %bb.1: +; RV32IM-NEXT: li a0, 2047 +; RV32IM-NEXT: ret +; RV32IM-NEXT: .LBB48_2: +; RV32IM-NEXT: lui a0, 1 +; RV32IM-NEXT: addi a0, a0, -2047 +; RV32IM-NEXT: ret +; +; RV64IM-LABEL: select_cst6: +; RV64IM: # %bb.0: +; RV64IM-NEXT: bnez a0, .LBB48_2 +; RV64IM-NEXT: # %bb.1: +; RV64IM-NEXT: li a0, 2047 +; RV64IM-NEXT: ret +; RV64IM-NEXT: .LBB48_2: +; RV64IM-NEXT: lui a0, 1 +; RV64IM-NEXT: addiw a0, a0, -2047 +; RV64IM-NEXT: ret +; +; RV64IMXVTCONDOPS-LABEL: select_cst6: +; RV64IMXVTCONDOPS: # %bb.0: +; RV64IMXVTCONDOPS-NEXT: li a1, 2 +; RV64IMXVTCONDOPS-NEXT: vt.maskc a0, a1, a0 +; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047 +; RV64IMXVTCONDOPS-NEXT: ret +; +; CHECKZICOND-LABEL: select_cst6: +; CHECKZICOND: # %bb.0: +; CHECKZICOND-NEXT: li a1, 2 +; CHECKZICOND-NEXT: czero.eqz a0, a1, a0 +; CHECKZICOND-NEXT: addi a0, a0, 2047 +; CHECKZICOND-NEXT: ret + %ret = select i1 %cond, i32 2049, i32 2047 + ret i32 %ret +} diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index a2a953c..87f2a63 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -871,3 +871,64 @@ define void @zext_nneg_dominating_icmp_i32_zeroext(i16 signext %0) { 5: ret void } + +; The load is used extended and non-extended in the successor basic block. The +; signed compare will cause the non-extended value to exported out of the first +; basic block using a sext to XLen. We need to CSE the zext nneg with the sext +; so that we can form a sextload. +define void @load_zext_nneg_sext_cse(ptr %p) nounwind { +; RV32I-LABEL: load_zext_nneg_sext_cse: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: lh s0, 0(a0) +; RV32I-NEXT: bltz s0, .LBB50_2 +; RV32I-NEXT: # %bb.1: # %bb1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call bar_i16 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail bar_i32 +; RV32I-NEXT: .LBB50_2: # %bb2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64-LABEL: load_zext_nneg_sext_cse: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: lh s0, 0(a0) +; RV64-NEXT: bltz s0, .LBB50_2 +; RV64-NEXT: # %bb.1: # %bb1 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: call bar_i16 +; RV64-NEXT: mv a0, s0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: tail bar_i32 +; RV64-NEXT: .LBB50_2: # %bb2 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %load = load i16, ptr %p + %zext = zext nneg i16 %load to i32 + %cmp = icmp sgt i16 %load, -1 + br i1 %cmp, label %bb1, label %bb2 + +bb1: + tail call void @bar_i16(i16 signext %load) + tail call void @bar_i32(i32 signext %zext) + br label %bb2 + +bb2: + ret void +} +declare void @bar_i16(i16); diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll index fc35bc4..8d065da 100644 --- a/llvm/test/CodeGen/RISCV/split-offsets.ll +++ b/llvm/test/CodeGen/RISCV/split-offsets.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I -; RUN: llc -mtriple=riscv64 -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I ; Check that memory accesses to array elements with large offsets have those diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index ec6e978..7fc4713 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s -; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IM %s -; RUN: llc -mtriple=riscv64 -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s -; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64IM %s define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index eea8e64..540883fdc 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s -; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s -; RUN: llc -mtriple=riscv64 -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s -; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs -riscv-enable-sink-fold < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s |