aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir1
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir12
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll112
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll11
-rw-r--r--llvm/test/CodeGen/AArch64/cheap-as-a-move.ll30
-rw-r--r--llvm/test/CodeGen/AArch64/extract-bits.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/hadd-combine.ll54
-rw-r--r--llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll50
-rw-r--r--llvm/test/CodeGen/AArch64/sadd_sat.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/sadd_sat_vec.ll299
-rw-r--r--llvm/test/CodeGen/AArch64/sink-and-fold.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/sms-regpress.mir160
-rw-r--r--llvm/test/CodeGen/AArch64/ssub_sat.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/ssub_sat_vec.ll299
-rw-r--r--llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll75
-rw-r--r--llvm/test/CodeGen/AArch64/uadd_sat_vec.ll295
-rw-r--r--llvm/test/CodeGen/AArch64/usub_sat_vec.ll291
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll86
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/bf16.ll73
-rw-r--r--llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll11
-rw-r--r--llvm/test/CodeGen/AMDGPU/function-args.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/function-returns.ll87
-rw-r--r--llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll582
-rw-r--r--llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll28
-rw-r--r--llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/skip-if-dead.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll4
-rw-r--r--llvm/test/CodeGen/Generic/allow-check.ll1
-rw-r--r--llvm/test/CodeGen/PowerPC/legalize-vaarg.ll17
-rw-r--r--llvm/test/CodeGen/PowerPC/sms-regpress.mir186
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir902
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir534
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir900
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir900
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir1589
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir810
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir1589
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir694
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir817
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir116
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir88
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir1589
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir820
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir675
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir820
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir820
-rw-r--r--llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll21
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll183
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll60
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll209
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll53
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll256
-rw-r--r--llvm/test/CodeGen/SPARC/inlineasm-bad.ll9
-rw-r--r--llvm/test/CodeGen/SPARC/inlineasm.ll9
-rw-r--r--llvm/test/CodeGen/SPIRV/OpVariable_order.ll14
-rw-r--r--llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll26
-rw-r--r--llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll60
-rw-r--r--llvm/test/CodeGen/WebAssembly/multi-return.ll72
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-arith.ll13138
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd.ll408
-rw-r--r--llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll11
-rw-r--r--llvm/test/CodeGen/X86/AppendingLinkage.ll2
-rw-r--r--llvm/test/CodeGen/X86/combine-pavg.ll9
-rw-r--r--llvm/test/CodeGen/X86/evex-to-vex-compress.mir32
-rw-r--r--llvm/test/CodeGen/X86/freeze-vector.ll6
-rw-r--r--llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll220
-rw-r--r--llvm/test/CodeGen/X86/load-local-v3i129.ll4
-rw-r--r--llvm/test/CodeGen/X86/pr23664.ll2
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-nowrap.ll2213
-rw-r--r--llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll18
79 files changed, 24061 insertions, 9471 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index ac3c47c..200e9d1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -395,6 +395,7 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_SADDSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_USUBSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
index 499c08f..7921de6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
@@ -15,7 +15,7 @@
define void @mul_wrong_pow_2(ptr %addr) { ret void }
define void @more_than_one_use_shl_1(ptr %addr) { ret void }
define void @more_than_one_use_shl_2(ptr %addr) { ret void }
- define void @more_than_one_use_shl_lsl_fast(ptr %addr) #1 { ret void }
+ define void @more_than_one_use_shl_lsl_fast(ptr %addr) { ret void }
define void @more_than_one_use_shl_lsl_slow(ptr %addr) { ret void }
define void @more_than_one_use_shl_minsize(ptr %addr) #0 { ret void }
define void @ldrwrox(ptr %addr) { ret void }
@@ -24,7 +24,6 @@
define void @ldbbrox(ptr %addr) { ret void }
define void @ldrqrox(ptr %addr) { ret void }
attributes #0 = { optsize }
- attributes #1 = { "target-features"="+addr-lsl-fast" }
...
---
@@ -478,11 +477,10 @@ body: |
; CHECK: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
- ; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3
- ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr)
- ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr)
- ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
+ ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr)
+ ; CHECK-NEXT: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr)
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]]
; CHECK-NEXT: $x2 = COPY [[ADDXrr]]
; CHECK-NEXT: RET_ReallyLR implicit $x2
%0:gpr(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index 59cd87f..022aaea 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK0
-; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-fast | FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-slow-14 | FileCheck %s --check-prefixes=CHECK,CHECK0
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK3
%struct.a = type [256 x i16]
%struct.b = type [256 x i32]
@@ -49,36 +49,20 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
}
define i32 @word(ptr %ctx, i32 %xor72) nounwind {
-; CHECK0-LABEL: word:
-; CHECK0: // %bb.0:
-; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK0-NEXT: ubfx x8, x1, #9, #8
-; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK0-NEXT: mov x19, x0
-; CHECK0-NEXT: lsl x21, x8, #2
-; CHECK0-NEXT: ldr w20, [x0, x21]
-; CHECK0-NEXT: bl foo
-; CHECK0-NEXT: mov w0, w20
-; CHECK0-NEXT: str w20, [x19, x21]
-; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
-; CHECK0-NEXT: ret
-;
-; CHECK3-LABEL: word:
-; CHECK3: // %bb.0:
-; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK3-NEXT: ubfx x21, x1, #9, #8
-; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK3-NEXT: mov x19, x0
-; CHECK3-NEXT: ldr w20, [x0, x21, lsl #2]
-; CHECK3-NEXT: bl foo
-; CHECK3-NEXT: mov w0, w20
-; CHECK3-NEXT: str w20, [x19, x21, lsl #2]
-; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
-; CHECK3-NEXT: ret
+; CHECK-LABEL: word:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ubfx x21, x1, #9, #8
+; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: ldr w20, [x0, x21, lsl #2]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: mov w0, w20
+; CHECK-NEXT: str w20, [x19, x21, lsl #2]
+; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ret
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
@@ -90,36 +74,20 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind {
}
define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind {
-; CHECK0-LABEL: doubleword:
-; CHECK0: // %bb.0:
-; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK0-NEXT: ubfx x8, x1, #9, #8
-; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK0-NEXT: mov x19, x0
-; CHECK0-NEXT: lsl x21, x8, #3
-; CHECK0-NEXT: ldr x20, [x0, x21]
-; CHECK0-NEXT: bl foo
-; CHECK0-NEXT: mov x0, x20
-; CHECK0-NEXT: str x20, [x19, x21]
-; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
-; CHECK0-NEXT: ret
-;
-; CHECK3-LABEL: doubleword:
-; CHECK3: // %bb.0:
-; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
-; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1
-; CHECK3-NEXT: ubfx x21, x1, #9, #8
-; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK3-NEXT: mov x19, x0
-; CHECK3-NEXT: ldr x20, [x0, x21, lsl #3]
-; CHECK3-NEXT: bl foo
-; CHECK3-NEXT: mov x0, x20
-; CHECK3-NEXT: str x20, [x19, x21, lsl #3]
-; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
-; CHECK3-NEXT: ret
+; CHECK-LABEL: doubleword:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT: ubfx x21, x1, #9, #8
+; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: ldr x20, [x0, x21, lsl #3]
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: str x20, [x19, x21, lsl #3]
+; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ret
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
@@ -163,20 +131,12 @@ endbb:
}
define i64 @gep3(ptr %p, i64 %b) {
-; CHECK0-LABEL: gep3:
-; CHECK0: // %bb.0:
-; CHECK0-NEXT: lsl x9, x1, #3
-; CHECK0-NEXT: mov x8, x0
-; CHECK0-NEXT: ldr x0, [x0, x9]
-; CHECK0-NEXT: str x1, [x8, x9]
-; CHECK0-NEXT: ret
-;
-; CHECK3-LABEL: gep3:
-; CHECK3: // %bb.0:
-; CHECK3-NEXT: mov x8, x0
-; CHECK3-NEXT: ldr x0, [x0, x1, lsl #3]
-; CHECK3-NEXT: str x1, [x8, x1, lsl #3]
-; CHECK3-NEXT: ret
+; CHECK-LABEL: gep3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, x0
+; CHECK-NEXT: ldr x0, [x0, x1, lsl #3]
+; CHECK-NEXT: str x1, [x8, x1, lsl #3]
+; CHECK-NEXT: ret
%g = getelementptr inbounds i64, ptr %p, i64 %b
%l = load i64, ptr %g
store i64 %b, ptr %g
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
index 573f921..e31c9a0 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
@@ -134,9 +134,8 @@ define void @test8(i64 %a, ptr noalias %src, ptr noalias %dst, i64 %n) {
; CHECK-NEXT: b.hs .LBB7_1
; CHECK-NEXT: // %bb.3: // %if.then
; CHECK-NEXT: // in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT: lsl x10, x8, #3
-; CHECK-NEXT: ldr x11, [x1, x10]
-; CHECK-NEXT: str x11, [x2, x10]
+; CHECK-NEXT: ldr x10, [x1, x8, lsl #3]
+; CHECK-NEXT: str x10, [x2, x8, lsl #3]
; CHECK-NEXT: b .LBB7_1
; CHECK-NEXT: .LBB7_4: // %exit
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
index d593272..6bcd2f0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
@@ -125,7 +125,7 @@ return: ; preds = %if.end23, %if.then3
}
; CHECK: @test
-; CHECK-NOT: , uxtw #2]
+; CHECK: , uxtw #2]
define i32 @test(ptr %array, i8 zeroext %c, i32 %arg) {
entry:
%conv = zext i8 %c to i32
diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
index 3542b26..5b055a4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
@@ -201,11 +201,10 @@ define void @fct1_64x1(ptr nocapture %array, i64 %offset) nounwind ssp {
; CHECK-LABEL: fct1_64x1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:globalArray64x1
-; CHECK-NEXT: lsl x9, x1, #3
; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1]
-; CHECK-NEXT: ldr d0, [x0, x9]
+; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: str d0, [x8, x9]
+; CHECK-NEXT: str d0, [x8, x1, lsl #3]
; CHECK-NEXT: ret
entry:
%arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 %offset
@@ -238,11 +237,10 @@ define void @fct1_32x2(ptr nocapture %array, i64 %offset) nounwind ssp {
; CHECK-LABEL: fct1_32x2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:globalArray32x2
-; CHECK-NEXT: lsl x9, x1, #3
; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2]
-; CHECK-NEXT: ldr d0, [x0, x9]
+; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: str d0, [x8, x9]
+; CHECK-NEXT: str d0, [x8, x1, lsl #3]
; CHECK-NEXT: ret
entry:
%arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 %offset
@@ -275,11 +273,10 @@ define void @fct1_16x4(ptr nocapture %array, i64 %offset) nounwind ssp {
; CHECK-LABEL: fct1_16x4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:globalArray16x4
-; CHECK-NEXT: lsl x9, x1, #3
; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4]
-; CHECK-NEXT: ldr d0, [x0, x9]
+; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: str d0, [x8, x9]
+; CHECK-NEXT: str d0, [x8, x1, lsl #3]
; CHECK-NEXT: ret
entry:
%arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 %offset
@@ -312,11 +309,10 @@ define void @fct1_8x8(ptr nocapture %array, i64 %offset) nounwind ssp {
; CHECK-LABEL: fct1_8x8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:globalArray8x8
-; CHECK-NEXT: lsl x9, x1, #3
; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x8]
-; CHECK-NEXT: ldr d0, [x0, x9]
+; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
; CHECK-NEXT: ldr x8, [x8]
-; CHECK-NEXT: str d0, [x8, x9]
+; CHECK-NEXT: str d0, [x8, x1, lsl #3]
; CHECK-NEXT: ret
entry:
%arrayidx = getelementptr inbounds <8 x i8>, ptr %array, i64 %offset
diff --git a/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll b/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll
index 8f19553..634d1b9 100644
--- a/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll
@@ -82,13 +82,12 @@ define void @avoid_promotion_2_and(ptr nocapture noundef %arg) {
; CHECK-NEXT: eor w10, w10, w11
; CHECK-NEXT: ldur w11, [x8, #-24]
; CHECK-NEXT: and w10, w10, w14
-; CHECK-NEXT: ldp x15, x14, [x8, #-16]
-; CHECK-NEXT: ubfiz x13, x10, #1, #32
+; CHECK-NEXT: ldp x14, x13, [x8, #-16]
; CHECK-NEXT: str w10, [x8]
-; CHECK-NEXT: and w10, w11, w12
-; CHECK-NEXT: ldrh w11, [x14, x13]
-; CHECK-NEXT: strh w11, [x15, w10, uxtw #1]
-; CHECK-NEXT: strh w12, [x14, x13]
+; CHECK-NEXT: and w11, w11, w12
+; CHECK-NEXT: ldrh w15, [x13, w10, uxtw #1]
+; CHECK-NEXT: strh w15, [x14, w11, uxtw #1]
+; CHECK-NEXT: strh w12, [x13, w10, uxtw #1]
; CHECK-NEXT: b LBB1_1
; CHECK-NEXT: LBB1_4: ; %exit
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll
index b5c2104..50c70c5 100644
--- a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll
+++ b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll
@@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux"
define void @f0(ptr %a, i64 %n) {
; CHECK-LABEL: f0:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 48
@@ -15,7 +15,6 @@ define void @f0(ptr %a, i64 %n) {
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w23, -40
; CHECK-NEXT: .cfi_offset w30, -48
; CHECK-NEXT: mov x21, #1 // =0x1
; CHECK-NEXT: mov x19, x1
@@ -27,18 +26,17 @@ define void @f0(ptr %a, i64 %n) {
; CHECK-NEXT: b.ge .LBB0_2
; CHECK-NEXT: .LBB0_1: // %loop.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lsl x23, x22, #2
+; CHECK-NEXT: ldr w0, [x20, x22, lsl #2]
; CHECK-NEXT: mov x1, x21
-; CHECK-NEXT: ldr w0, [x20, x23]
; CHECK-NEXT: bl g
-; CHECK-NEXT: str w0, [x20, x23]
+; CHECK-NEXT: str w0, [x20, x22, lsl #2]
; CHECK-NEXT: add x22, x22, #1
; CHECK-NEXT: cmp x22, x19
; CHECK-NEXT: b.lt .LBB0_1
; CHECK-NEXT: .LBB0_2: // %exit
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
br label %loop
@@ -64,15 +62,13 @@ exit:
define void @f1(ptr %a, i64 %n) {
; CHECK-LABEL: f1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
-; CHECK-NEXT: .cfi_offset w22, -32
-; CHECK-NEXT: .cfi_offset w30, -48
+; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: mov x19, x1
; CHECK-NEXT: mov x20, x0
; CHECK-NEXT: mov x21, xzr
@@ -80,19 +76,17 @@ define void @f1(ptr %a, i64 %n) {
; CHECK-NEXT: b.ge .LBB1_2
; CHECK-NEXT: .LBB1_1: // %loop.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lsl x22, x21, #2
+; CHECK-NEXT: ldr w0, [x20, x21, lsl #2]
; CHECK-NEXT: mov x1, #1450704896 // =0x56780000
; CHECK-NEXT: movk x1, #4660, lsl #48
-; CHECK-NEXT: ldr w0, [x20, x22]
; CHECK-NEXT: bl g
-; CHECK-NEXT: str w0, [x20, x22]
+; CHECK-NEXT: str w0, [x20, x21, lsl #2]
; CHECK-NEXT: add x21, x21, #1
; CHECK-NEXT: cmp x21, x19
; CHECK-NEXT: b.lt .LBB1_1
; CHECK-NEXT: .LBB1_2: // %exit
-; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
entry:
br label %loop
diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index d4ea143..b87157a 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -972,10 +972,9 @@ define void @pr38938(ptr %a0, ptr %a1) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr x8, [x1]
; CHECK-NEXT: ubfx x8, x8, #21, #10
-; CHECK-NEXT: lsl x8, x8, #2
-; CHECK-NEXT: ldr w9, [x0, x8]
+; CHECK-NEXT: ldr w9, [x0, x8, lsl #2]
; CHECK-NEXT: add w9, w9, #1
-; CHECK-NEXT: str w9, [x0, x8]
+; CHECK-NEXT: str w9, [x0, x8, lsl #2]
; CHECK-NEXT: ret
%tmp = load i64, ptr %a1, align 8
%tmp1 = lshr i64 %tmp, 21
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index 491bf40..c0f7678 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -903,6 +903,58 @@ define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %res
}
+define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: shadd_demandedelts:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+ %op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
+ %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r0
+}
+
+define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: srhadd_demandedelts:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+ %op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
+ %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r0
+}
+
+define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: uhadd_demandedelts:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+ %op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
+ %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r0
+}
+
+define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: urhadd_demandedelts:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
+ %op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
+ %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %r0
+}
+
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
@@ -927,4 +979,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) \ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index 30123a3..e8dafd5 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -223,10 +223,9 @@ define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) {
; CHECK-NEXT: // Parent Loop BB3_1 Depth=1
; CHECK-NEXT: // => This Loop Header: Depth=2
; CHECK-NEXT: // Child Loop BB3_3 Depth 3
-; CHECK-NEXT: lsl x12, x11, #3
+; CHECK-NEXT: ldr x13, [x1, x11, lsl #3]
+; CHECK-NEXT: ldr x12, [x10, x11, lsl #3]
; CHECK-NEXT: mov x14, x4
-; CHECK-NEXT: ldr x13, [x1, x12]
-; CHECK-NEXT: ldr x12, [x10, x12]
; CHECK-NEXT: ldr w13, [x13]
; CHECK-NEXT: .LBB3_3: // %for.body8
; CHECK-NEXT: // Parent Loop BB3_1 Depth=1
diff --git a/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll
new file mode 100644
index 0000000..728cffe
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll
@@ -0,0 +1,50 @@
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;--- ok.ll
+
+; RUN: llc -mtriple=aarch64-linux ok.ll -o - | \
+; RUN: FileCheck %s --check-prefix=ASM
+; RUN: llc -mtriple=aarch64-linux ok.ll -filetype=obj -o - | \
+; RUN: llvm-readelf --notes - | FileCheck %s --check-prefix=OBJ
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458}
+!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 85}
+
+; ASM: .section .note.gnu.property,"a",@note
+; ASM-NEXT: .p2align 3, 0x0
+; ASM-NEXT: .word 4
+; ASM-NEXT: .word 24
+; ASM-NEXT: .word 5
+; ASM-NEXT: .asciz "GNU"
+; 3221225473 = 0xc0000001 = GNU_PROPERTY_AARCH64_FEATURE_PAUTH
+; ASM-NEXT: .word 3221225473
+; ASM-NEXT: .word 16
+; ASM-NEXT: .xword 268435458
+; ASM-NEXT: .xword 85
+
+; OBJ: Displaying notes found in: .note.gnu.property
+; OBJ-NEXT: Owner Data size Description
+; OBJ-NEXT: GNU 0x00000018 NT_GNU_PROPERTY_TYPE_0 (property note)
+; OBJ-NEXT: AArch64 PAuth ABI core info: platform 0x10000002 (llvm_linux), version 0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini)
+
+; ERR: either both or no 'aarch64-elf-pauthabi-platform' and 'aarch64-elf-pauthabi-version' module flags must be present
+
+;--- err1.ll
+
+; RUN: not llc -mtriple=aarch64-linux err1.ll 2>&1 -o - | \
+; RUN: FileCheck %s --check-prefix=ERR
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 2}
+
+;--- err2.ll
+
+; RUN: not llc -mtriple=aarch64-linux err2.ll 2>&1 -o - | \
+; RUN: FileCheck %s --check-prefix=ERR
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 31}
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll
index 9e09b7f..789fd7b 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll
@@ -2,8 +2,6 @@
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for vec
-
declare i4 @llvm.sadd.sat.i4(i4, i4)
declare i8 @llvm.sadd.sat.i8(i8, i8)
declare i16 @llvm.sadd.sat.i16(i16, i16)
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index 6f1ae02..8a0e766 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -2,28 +2,10 @@
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for v16i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16
+; CHECK-GI: warning: Instruction selection used fallback path for v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128
declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>)
@@ -67,23 +49,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
}
define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
-; CHECK-LABEL: v32i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqadd v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: sqadd v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v32i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqadd v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: sqadd v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v32i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqadd v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: sqadd v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: ret
%z = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
ret <32 x i8> %z
}
define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
-; CHECK-LABEL: v64i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b
-; CHECK-NEXT: sqadd v0.16b, v0.16b, v4.16b
-; CHECK-NEXT: sqadd v1.16b, v1.16b, v5.16b
-; CHECK-NEXT: sqadd v3.16b, v3.16b, v7.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v64i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqadd v2.16b, v2.16b, v6.16b
+; CHECK-SD-NEXT: sqadd v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT: sqadd v1.16b, v1.16b, v5.16b
+; CHECK-SD-NEXT: sqadd v3.16b, v3.16b, v7.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v64i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqadd v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT: sqadd v1.16b, v1.16b, v5.16b
+; CHECK-GI-NEXT: sqadd v2.16b, v2.16b, v6.16b
+; CHECK-GI-NEXT: sqadd v3.16b, v3.16b, v7.16b
+; CHECK-GI-NEXT: ret
%z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
}
@@ -98,23 +94,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
}
define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
-; CHECK-LABEL: v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqadd v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: sqadd v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v16i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqadd v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: sqadd v0.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v16i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: sqadd v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: ret
%z = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
ret <16 x i16> %z
}
define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
-; CHECK-LABEL: v32i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h
-; CHECK-NEXT: sqadd v0.8h, v0.8h, v4.8h
-; CHECK-NEXT: sqadd v1.8h, v1.8h, v5.8h
-; CHECK-NEXT: sqadd v3.8h, v3.8h, v7.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v32i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqadd v2.8h, v2.8h, v6.8h
+; CHECK-SD-NEXT: sqadd v0.8h, v0.8h, v4.8h
+; CHECK-SD-NEXT: sqadd v1.8h, v1.8h, v5.8h
+; CHECK-SD-NEXT: sqadd v3.8h, v3.8h, v7.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v32i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: sqadd v1.8h, v1.8h, v5.8h
+; CHECK-GI-NEXT: sqadd v2.8h, v2.8h, v6.8h
+; CHECK-GI-NEXT: sqadd v3.8h, v3.8h, v7.8h
+; CHECK-GI-NEXT: ret
%z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
}
@@ -135,19 +145,42 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
}
define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ldr s1, [x1]
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: shl v1.4h, v1.4h, #8
-; CHECK-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
-; CHECK-NEXT: str s0, [x2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ldr s1, [x1]
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8
+; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: sqadd v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-SD-NEXT: str s0, [x2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v0.b[3]
+; CHECK-GI-NEXT: mov b6, v1.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov b2, v1.b[2]
+; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
+; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
+; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
+; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: str w8, [x2]
+; CHECK-GI-NEXT: ret
%x = load <4 x i8>, ptr %px
%y = load <4 x i8>, ptr %py
%z = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
@@ -196,23 +229,37 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind {
}
define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1 { v0.h }[0], [x0]
-; CHECK-NEXT: ld1 { v1.h }[0], [x1]
-; CHECK-NEXT: add x8, x0, #2
-; CHECK-NEXT: add x9, x1, #2
-; CHECK-NEXT: ld1 { v0.h }[2], [x8]
-; CHECK-NEXT: ld1 { v1.h }[2], [x9]
-; CHECK-NEXT: shl v1.2s, v1.2s, #16
-; CHECK-NEXT: shl v0.2s, v0.2s, #16
-; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: ushr v0.2s, v0.2s, #16
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: strh w9, [x2]
-; CHECK-NEXT: strh w8, [x2, #2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0]
+; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1]
+; CHECK-SD-NEXT: add x8, x0, #2
+; CHECK-SD-NEXT: add x9, x1, #2
+; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8]
+; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9]
+; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16
+; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
+; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16
+; CHECK-SD-NEXT: mov w8, v0.s[1]
+; CHECK-SD-NEXT: fmov w9, s0
+; CHECK-SD-NEXT: strh w9, [x2]
+; CHECK-SD-NEXT: strh w8, [x2, #2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr h1, [x0, #2]
+; CHECK-GI-NEXT: ldr h2, [x1]
+; CHECK-GI-NEXT: ldr h3, [x1, #2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[1], v3.h[0]
+; CHECK-GI-NEXT: sqadd v0.4h, v0.4h, v2.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: str h0, [x2]
+; CHECK-GI-NEXT: str h1, [x2, #2]
+; CHECK-GI-NEXT: ret
%x = load <2 x i16>, ptr %px
%y = load <2 x i16>, ptr %py
%z = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
@@ -230,15 +277,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
}
define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v12i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: sqadd v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: sqadd v1.8h, v2.8h, v3.8h
-; CHECK-NEXT: str q0, [x2]
-; CHECK-NEXT: str d1, [x2, #16]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v12i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldp q0, q3, [x1]
+; CHECK-SD-NEXT: ldp q1, q2, [x0]
+; CHECK-SD-NEXT: sqadd v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: sqadd v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT: str q0, [x2]
+; CHECK-SD-NEXT: str d1, [x2, #16]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v12i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: ldr d2, [x0, #16]
+; CHECK-GI-NEXT: ldr d3, [x1, #16]
+; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sqadd v1.4h, v2.4h, v3.4h
+; CHECK-GI-NEXT: str q0, [x2]
+; CHECK-GI-NEXT: str d1, [x2, #16]
+; CHECK-GI-NEXT: ret
%x = load <12 x i16>, ptr %px
%y = load <12 x i16>, ptr %py
%z = call <12 x i16> @llvm.sadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y)
@@ -346,23 +405,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
}
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqadd v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqadd v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: sqadd v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqadd v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: sqadd v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: ret
%z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
}
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
-; CHECK-LABEL: v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s
-; CHECK-NEXT: sqadd v0.4s, v0.4s, v4.4s
-; CHECK-NEXT: sqadd v1.4s, v1.4s, v5.4s
-; CHECK-NEXT: sqadd v3.4s, v3.4s, v7.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v16i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqadd v2.4s, v2.4s, v6.4s
+; CHECK-SD-NEXT: sqadd v0.4s, v0.4s, v4.4s
+; CHECK-SD-NEXT: sqadd v1.4s, v1.4s, v5.4s
+; CHECK-SD-NEXT: sqadd v3.4s, v3.4s, v7.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v16i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqadd v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: sqadd v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT: sqadd v2.4s, v2.4s, v6.4s
+; CHECK-GI-NEXT: sqadd v3.4s, v3.4s, v7.4s
+; CHECK-GI-NEXT: ret
%z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
ret <16 x i32> %z
}
@@ -377,23 +450,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
}
define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
-; CHECK-LABEL: v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqadd v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: sqadd v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqadd v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: sqadd v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqadd v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: sqadd v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: ret
%z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
}
define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d
-; CHECK-NEXT: sqadd v0.2d, v0.2d, v4.2d
-; CHECK-NEXT: sqadd v1.2d, v1.2d, v5.2d
-; CHECK-NEXT: sqadd v3.2d, v3.2d, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqadd v2.2d, v2.2d, v6.2d
+; CHECK-SD-NEXT: sqadd v0.2d, v0.2d, v4.2d
+; CHECK-SD-NEXT: sqadd v1.2d, v1.2d, v5.2d
+; CHECK-SD-NEXT: sqadd v3.2d, v3.2d, v7.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqadd v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: sqadd v1.2d, v1.2d, v5.2d
+; CHECK-GI-NEXT: sqadd v2.2d, v2.2d, v6.2d
+; CHECK-GI-NEXT: sqadd v3.2d, v3.2d, v7.2d
+; CHECK-GI-NEXT: ret
%z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z
}
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index 5200722..f65a08a 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -100,7 +100,7 @@ exit:
}
; Address calculation cheap enough on some cores.
-define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast,+addr-lsl-fast" {
+define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: f3:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: tbz w0, #0, .LBB3_2
@@ -130,7 +130,7 @@ exit:
ret i32 %v
}
-define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast,+addr-lsl-fast" {
+define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
; CHECK-LABEL: f4:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cmp x1, #1
diff --git a/llvm/test/CodeGen/AArch64/sms-regpress.mir b/llvm/test/CodeGen/AArch64/sms-regpress.mir
new file mode 100644
index 0000000..c75eba5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-regpress.mir
@@ -0,0 +1,160 @@
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-max-mii=40 -pipeliner-register-pressure -pipeliner-ii-search-range=30 -debug-only=pipeliner 2>&1 | FileCheck %s
+
+# REQUIRES: asserts
+
+# Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues.
+# The specific value of II is not important.
+
+# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}}
+# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}}
+# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}}
+# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}}
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+ define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr {
+ entry:
+ %0 = load double, ptr %a, align 8
+ %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8
+ %1 = load double, ptr %arrayidx1, align 8
+ %cmp133 = icmp sgt i32 %n, 0
+ br i1 %cmp133, label %for.body.preheader, label %for.cond.cleanup
+
+ for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext nneg i32 %n to i64
+ br label %for.body
+
+ for.cond.cleanup: ; preds = %for.body, %entry
+ %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add54, %for.body ]
+ ret double %res.0.lcssa
+
+ for.body: ; preds = %for.body.preheader, %for.body
+ %lsr.iv137 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+ %lsr.iv = phi ptr [ %b, %for.body.preheader ], [ %scevgep, %for.body ]
+ %res.0135 = phi double [ 0.000000e+00, %for.body.preheader ], [ %add54, %for.body ]
+ %2 = load double, ptr %lsr.iv, align 8
+ %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %0)
+ %4 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %3)
+ %5 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %4)
+ %6 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %5)
+ %7 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %6)
+ %8 = tail call double @llvm.fmuladd.f64(double %7, double %2, double %7)
+ %9 = tail call double @llvm.fmuladd.f64(double %8, double %2, double %8)
+ %10 = tail call double @llvm.fmuladd.f64(double %9, double %2, double %9)
+ %11 = tail call double @llvm.fmuladd.f64(double %10, double %2, double %10)
+ %12 = tail call double @llvm.fmuladd.f64(double %11, double %2, double %11)
+ %13 = tail call double @llvm.fmuladd.f64(double %12, double %2, double %12)
+ %14 = tail call double @llvm.fmuladd.f64(double %13, double %2, double %13)
+ %15 = tail call double @llvm.fmuladd.f64(double %14, double %2, double %14)
+ %16 = tail call double @llvm.fmuladd.f64(double %15, double %2, double %15)
+ %17 = tail call double @llvm.fmuladd.f64(double %16, double %2, double %16)
+ %18 = tail call double @llvm.fmuladd.f64(double %17, double %2, double %17)
+ %add = fadd double %17, %18
+ %19 = tail call double @llvm.fmuladd.f64(double %18, double %2, double %add)
+ %add35 = fadd double %10, %19
+ %20 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %add35)
+ %add38 = fadd double %11, %20
+ %21 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %add38)
+ %add41 = fadd double %12, %21
+ %22 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %add41)
+ %add44 = fadd double %14, %15
+ %add45 = fadd double %13, %add44
+ %add46 = fadd double %add45, %22
+ %23 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %add46)
+ %mul = fmul double %2, %7
+ %mul51 = fmul double %1, %mul
+ %24 = tail call double @llvm.fmuladd.f64(double %mul51, double %9, double %23)
+ %25 = tail call double @llvm.fmuladd.f64(double %8, double %1, double %24)
+ %add54 = fadd double %res.0135, %25
+ %scevgep = getelementptr i8, ptr %lsr.iv, i64 8
+ %lsr.iv.next = add nsw i64 %lsr.iv137, -1
+ %exitcond.not = icmp eq i64 %lsr.iv.next, 0
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+ }
+
+ declare double @llvm.fmuladd.f64(double, double, double)
+
+...
+---
+name: kernel
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0', virtual-reg: '%10' }
+ - { reg: '$x1', virtual-reg: '%11' }
+ - { reg: '$w2', virtual-reg: '%12' }
+body: |
+ bb.0.entry:
+ successors: %bb.1, %bb.4
+ liveins: $x0, $x1, $w2
+
+ %12:gpr32common = COPY $w2
+ %11:gpr64 = COPY $x1
+ %10:gpr64common = COPY $x0
+ dead $wzr = SUBSWri %12, 1, 0, implicit-def $nzcv
+ Bcc 10, %bb.1, implicit $nzcv
+
+ bb.4:
+ %13:fpr64 = FMOVD0
+ B %bb.2
+
+ bb.1.for.body.preheader:
+ %0:fpr64 = LDRDui %10, 0 :: (load (s64) from %ir.a)
+ %1:fpr64 = LDRDui %10, 1 :: (load (s64) from %ir.arrayidx1)
+ %16:gpr32 = ORRWrs $wzr, %12, 0
+ %2:gpr64all = SUBREG_TO_REG 0, killed %16, %subreg.sub_32
+ %15:fpr64 = FMOVD0
+ B %bb.3
+
+ bb.2.for.cond.cleanup:
+ %3:fpr64 = PHI %13, %bb.4, %7, %bb.3
+ $d0 = COPY %3
+ RET_ReallyLR implicit $d0
+
+ bb.3.for.body:
+ successors: %bb.2, %bb.3
+
+ %4:gpr64sp = PHI %2, %bb.1, %9, %bb.3
+ %5:gpr64sp = PHI %11, %bb.1, %8, %bb.3
+ %6:fpr64 = PHI %15, %bb.1, %7, %bb.3
+ early-clobber %17:gpr64sp, %18:fpr64 = LDRDpost %5, 8 :: (load (s64) from %ir.lsr.iv)
+ %19:fpr64 = nofpexcept FMADDDrrr %0, %18, %0, implicit $fpcr
+ %20:fpr64 = nofpexcept FMADDDrrr %19, %18, %19, implicit $fpcr
+ %21:fpr64 = nofpexcept FMADDDrrr %20, %18, %20, implicit $fpcr
+ %22:fpr64 = nofpexcept FMADDDrrr %21, %18, %21, implicit $fpcr
+ %23:fpr64 = nofpexcept FMADDDrrr %22, %18, %22, implicit $fpcr
+ %24:fpr64 = nofpexcept FMADDDrrr %23, %18, %23, implicit $fpcr
+ %25:fpr64 = nofpexcept FMADDDrrr %24, %18, %24, implicit $fpcr
+ %26:fpr64 = nofpexcept FMADDDrrr %25, %18, %25, implicit $fpcr
+ %27:fpr64 = nofpexcept FMADDDrrr %26, %18, %26, implicit $fpcr
+ %28:fpr64 = nofpexcept FMADDDrrr %27, %18, %27, implicit $fpcr
+ %29:fpr64 = nofpexcept FMADDDrrr %28, %18, %28, implicit $fpcr
+ %30:fpr64 = nofpexcept FMADDDrrr %29, %18, %29, implicit $fpcr
+ %31:fpr64 = nofpexcept FMADDDrrr %30, %18, %30, implicit $fpcr
+ %32:fpr64 = nofpexcept FMADDDrrr %31, %18, %31, implicit $fpcr
+ %33:fpr64 = nofpexcept FMADDDrrr %32, %18, %32, implicit $fpcr
+ %34:fpr64 = nofpexcept FMADDDrrr %33, %18, %33, implicit $fpcr
+ %35:fpr64 = nofpexcept FADDDrr %33, %34, implicit $fpcr
+ %36:fpr64 = nofpexcept FMADDDrrr %34, %18, killed %35, implicit $fpcr
+ %37:fpr64 = nofpexcept FADDDrr %26, killed %36, implicit $fpcr
+ %38:fpr64 = nofpexcept FMADDDrrr %19, %18, killed %37, implicit $fpcr
+ %39:fpr64 = nofpexcept FADDDrr %27, killed %38, implicit $fpcr
+ %40:fpr64 = nofpexcept FMADDDrrr %20, %18, killed %39, implicit $fpcr
+ %41:fpr64 = nofpexcept FADDDrr %28, killed %40, implicit $fpcr
+ %42:fpr64 = nofpexcept FMADDDrrr %21, %18, killed %41, implicit $fpcr
+ %43:fpr64 = nofpexcept FADDDrr %30, %31, implicit $fpcr
+ %44:fpr64 = nofpexcept FADDDrr %29, killed %43, implicit $fpcr
+ %45:fpr64 = nofpexcept FADDDrr killed %44, killed %42, implicit $fpcr
+ %46:fpr64 = nofpexcept FMADDDrrr %22, %18, killed %45, implicit $fpcr
+ %47:fpr64 = nofpexcept FMULDrr %18, %23, implicit $fpcr
+ %48:fpr64 = nofpexcept FMULDrr %1, killed %47, implicit $fpcr
+ %49:fpr64 = nofpexcept FMADDDrrr killed %48, %25, killed %46, implicit $fpcr
+ %50:fpr64 = nofpexcept FMADDDrrr %24, %1, killed %49, implicit $fpcr
+ %7:fpr64 = nofpexcept FADDDrr %6, killed %50, implicit $fpcr
+ %8:gpr64all = COPY %17
+ %51:gpr64 = nsw SUBSXri %4, 1, 0, implicit-def $nzcv
+ %9:gpr64all = COPY %51
+ Bcc 0, %bb.2, implicit $nzcv
+ B %bb.3
+
+...
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll
index abeb4b3..4d755f4 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll
@@ -2,8 +2,6 @@
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for vec
-
declare i4 @llvm.ssub.sat.i4(i4, i4)
declare i8 @llvm.ssub.sat.i8(i8, i8)
declare i16 @llvm.ssub.sat.i16(i16, i16)
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index d1f843a..a8c1276 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -2,28 +2,10 @@
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for v16i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16
+; CHECK-GI: warning: Instruction selection used fallback path for v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128
declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>)
@@ -68,23 +50,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
}
define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
-; CHECK-LABEL: v32i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: sqsub v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v32i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqsub v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: sqsub v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v32i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqsub v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: sqsub v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: ret
%z = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
ret <32 x i8> %z
}
define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
-; CHECK-LABEL: v64i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b
-; CHECK-NEXT: sqsub v0.16b, v0.16b, v4.16b
-; CHECK-NEXT: sqsub v1.16b, v1.16b, v5.16b
-; CHECK-NEXT: sqsub v3.16b, v3.16b, v7.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v64i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqsub v2.16b, v2.16b, v6.16b
+; CHECK-SD-NEXT: sqsub v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT: sqsub v1.16b, v1.16b, v5.16b
+; CHECK-SD-NEXT: sqsub v3.16b, v3.16b, v7.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v64i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqsub v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT: sqsub v1.16b, v1.16b, v5.16b
+; CHECK-GI-NEXT: sqsub v2.16b, v2.16b, v6.16b
+; CHECK-GI-NEXT: sqsub v3.16b, v3.16b, v7.16b
+; CHECK-GI-NEXT: ret
%z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
}
@@ -99,23 +95,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
}
define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
-; CHECK-LABEL: v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: sqsub v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v16i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqsub v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: sqsub v0.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v16i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: sqsub v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: ret
%z = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
ret <16 x i16> %z
}
define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
-; CHECK-LABEL: v32i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h
-; CHECK-NEXT: sqsub v0.8h, v0.8h, v4.8h
-; CHECK-NEXT: sqsub v1.8h, v1.8h, v5.8h
-; CHECK-NEXT: sqsub v3.8h, v3.8h, v7.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v32i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqsub v2.8h, v2.8h, v6.8h
+; CHECK-SD-NEXT: sqsub v0.8h, v0.8h, v4.8h
+; CHECK-SD-NEXT: sqsub v1.8h, v1.8h, v5.8h
+; CHECK-SD-NEXT: sqsub v3.8h, v3.8h, v7.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v32i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: sqsub v1.8h, v1.8h, v5.8h
+; CHECK-GI-NEXT: sqsub v2.8h, v2.8h, v6.8h
+; CHECK-GI-NEXT: sqsub v3.8h, v3.8h, v7.8h
+; CHECK-GI-NEXT: ret
%z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
}
@@ -136,19 +146,42 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
}
define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ldr s1, [x1]
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: shl v1.4h, v1.4h, #8
-; CHECK-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
-; CHECK-NEXT: str s0, [x2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ldr s1, [x1]
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8
+; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: sqsub v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-SD-NEXT: str s0, [x2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v0.b[3]
+; CHECK-GI-NEXT: mov b6, v1.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov b2, v1.b[2]
+; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
+; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
+; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
+; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: str w8, [x2]
+; CHECK-GI-NEXT: ret
%x = load <4 x i8>, ptr %px
%y = load <4 x i8>, ptr %py
%z = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
@@ -197,23 +230,37 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind {
}
define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ld1 { v0.h }[0], [x0]
-; CHECK-NEXT: ld1 { v1.h }[0], [x1]
-; CHECK-NEXT: add x8, x0, #2
-; CHECK-NEXT: add x9, x1, #2
-; CHECK-NEXT: ld1 { v0.h }[2], [x8]
-; CHECK-NEXT: ld1 { v1.h }[2], [x9]
-; CHECK-NEXT: shl v1.2s, v1.2s, #16
-; CHECK-NEXT: shl v0.2s, v0.2s, #16
-; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: ushr v0.2s, v0.2s, #16
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: strh w9, [x2]
-; CHECK-NEXT: strh w8, [x2, #2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0]
+; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1]
+; CHECK-SD-NEXT: add x8, x0, #2
+; CHECK-SD-NEXT: add x9, x1, #2
+; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8]
+; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9]
+; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16
+; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
+; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16
+; CHECK-SD-NEXT: mov w8, v0.s[1]
+; CHECK-SD-NEXT: fmov w9, s0
+; CHECK-SD-NEXT: strh w9, [x2]
+; CHECK-SD-NEXT: strh w8, [x2, #2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr h1, [x0, #2]
+; CHECK-GI-NEXT: ldr h2, [x1]
+; CHECK-GI-NEXT: ldr h3, [x1, #2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[1], v3.h[0]
+; CHECK-GI-NEXT: sqsub v0.4h, v0.4h, v2.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: str h0, [x2]
+; CHECK-GI-NEXT: str h1, [x2, #2]
+; CHECK-GI-NEXT: ret
%x = load <2 x i16>, ptr %px
%y = load <2 x i16>, ptr %py
%z = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
@@ -231,15 +278,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
}
define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v12i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: sqsub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: sqsub v1.8h, v2.8h, v3.8h
-; CHECK-NEXT: str q0, [x2]
-; CHECK-NEXT: str d1, [x2, #16]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v12i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldp q0, q3, [x1]
+; CHECK-SD-NEXT: ldp q1, q2, [x0]
+; CHECK-SD-NEXT: sqsub v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: sqsub v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT: str q0, [x2]
+; CHECK-SD-NEXT: str d1, [x2, #16]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v12i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: ldr d2, [x0, #16]
+; CHECK-GI-NEXT: ldr d3, [x1, #16]
+; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: sqsub v1.4h, v2.4h, v3.4h
+; CHECK-GI-NEXT: str q0, [x2]
+; CHECK-GI-NEXT: str d1, [x2, #16]
+; CHECK-GI-NEXT: ret
%x = load <12 x i16>, ptr %px
%y = load <12 x i16>, ptr %py
%z = call <12 x i16> @llvm.ssub.sat.v12i16(<12 x i16> %x, <12 x i16> %y)
@@ -349,23 +408,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
}
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqsub v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: sqsub v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqsub v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: sqsub v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: ret
%z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
}
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
-; CHECK-LABEL: v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s
-; CHECK-NEXT: sqsub v0.4s, v0.4s, v4.4s
-; CHECK-NEXT: sqsub v1.4s, v1.4s, v5.4s
-; CHECK-NEXT: sqsub v3.4s, v3.4s, v7.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v16i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqsub v2.4s, v2.4s, v6.4s
+; CHECK-SD-NEXT: sqsub v0.4s, v0.4s, v4.4s
+; CHECK-SD-NEXT: sqsub v1.4s, v1.4s, v5.4s
+; CHECK-SD-NEXT: sqsub v3.4s, v3.4s, v7.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v16i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqsub v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: sqsub v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT: sqsub v2.4s, v2.4s, v6.4s
+; CHECK-GI-NEXT: sqsub v3.4s, v3.4s, v7.4s
+; CHECK-GI-NEXT: ret
%z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
ret <16 x i32> %z
}
@@ -380,23 +453,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
}
define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
-; CHECK-LABEL: v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: sqsub v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqsub v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: sqsub v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqsub v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: sqsub v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: ret
%z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
}
define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d
-; CHECK-NEXT: sqsub v0.2d, v0.2d, v4.2d
-; CHECK-NEXT: sqsub v1.2d, v1.2d, v5.2d
-; CHECK-NEXT: sqsub v3.2d, v3.2d, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sqsub v2.2d, v2.2d, v6.2d
+; CHECK-SD-NEXT: sqsub v0.2d, v0.2d, v4.2d
+; CHECK-SD-NEXT: sqsub v1.2d, v1.2d, v5.2d
+; CHECK-SD-NEXT: sqsub v3.2d, v3.2d, v7.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sqsub v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: sqsub v1.2d, v1.2d, v5.2d
+; CHECK-GI-NEXT: sqsub v2.2d, v2.2d, v6.2d
+; CHECK-GI-NEXT: sqsub v3.2d, v3.2d, v7.2d
+; CHECK-GI-NEXT: ret
%z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z
}
diff --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
new file mode 100644
index 0000000..bcfc7b3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: shl v0.4s, v0.4s, #31
+; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ret
+ %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> %mask, <4 x i32> undef), !nontemporal !0
+ ret <4 x i32> %load
+}
+
+define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_store_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: shl v1.4s, v1.4s, #31
+; CHECK-NEXT: cmlt v1.4s, v1.4s, #0
+; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> %mask), !nontemporal !0
+ ret void
+}
+
+define <4 x i32> @load_v4i32(ptr %a) nounwind {
+; CHECK-LABEL: load_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: ret
+ %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>, <4 x i32> undef), !nontemporal !0
+ ret <4 x i32> %load
+}
+
+define void @store_v4i32(<4 x i32> %x, ptr %a) nounwind {
+; CHECK-LABEL: store_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>), !nontemporal !0
+ ret void
+}
+
+define <vscale x 4 x i32> @masked_load_nxv4i32(ptr %a, <vscale x 4 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ret
+ %load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef), !nontemporal !0
+ ret <vscale x 4 x i32> %load
+}
+
+define void @masked_store_nxv4i32(<vscale x 4 x i32> %x, ptr %a, <vscale x 4 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_store_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> %x, ptr %a, i32 1, <vscale x 4 x i1> %mask), !nontemporal !0
+ ret void
+}
+
+declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32>, ptr, i32, <vscale x 4 x i1>)
+declare <4 x i32> @llvm.masked.load.v4i32(ptr, i32, <4 x i1>, <4 x i32>)
+declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
+
+!0 = !{i32 1}
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index f0bbed5..30ff700 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -2,28 +2,10 @@
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for v16i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16
+; CHECK-GI: warning: Instruction selection used fallback path for v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128
declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>)
@@ -67,23 +49,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
}
define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
-; CHECK-LABEL: v32i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqadd v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: uqadd v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v32i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqadd v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: uqadd v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v32i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqadd v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: uqadd v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: ret
%z = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
ret <32 x i8> %z
}
define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
-; CHECK-LABEL: v64i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b
-; CHECK-NEXT: uqadd v0.16b, v0.16b, v4.16b
-; CHECK-NEXT: uqadd v1.16b, v1.16b, v5.16b
-; CHECK-NEXT: uqadd v3.16b, v3.16b, v7.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v64i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqadd v2.16b, v2.16b, v6.16b
+; CHECK-SD-NEXT: uqadd v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT: uqadd v1.16b, v1.16b, v5.16b
+; CHECK-SD-NEXT: uqadd v3.16b, v3.16b, v7.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v64i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqadd v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT: uqadd v1.16b, v1.16b, v5.16b
+; CHECK-GI-NEXT: uqadd v2.16b, v2.16b, v6.16b
+; CHECK-GI-NEXT: uqadd v3.16b, v3.16b, v7.16b
+; CHECK-GI-NEXT: ret
%z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
}
@@ -98,23 +94,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
}
define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
-; CHECK-LABEL: v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqadd v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: uqadd v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v16i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqadd v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: uqadd v0.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v16i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: uqadd v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: ret
%z = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
ret <16 x i16> %z
}
define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
-; CHECK-LABEL: v32i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h
-; CHECK-NEXT: uqadd v0.8h, v0.8h, v4.8h
-; CHECK-NEXT: uqadd v1.8h, v1.8h, v5.8h
-; CHECK-NEXT: uqadd v3.8h, v3.8h, v7.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v32i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqadd v2.8h, v2.8h, v6.8h
+; CHECK-SD-NEXT: uqadd v0.8h, v0.8h, v4.8h
+; CHECK-SD-NEXT: uqadd v1.8h, v1.8h, v5.8h
+; CHECK-SD-NEXT: uqadd v3.8h, v3.8h, v7.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v32i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: uqadd v1.8h, v1.8h, v5.8h
+; CHECK-GI-NEXT: uqadd v2.8h, v2.8h, v6.8h
+; CHECK-GI-NEXT: uqadd v3.8h, v3.8h, v7.8h
+; CHECK-GI-NEXT: ret
%z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
}
@@ -135,16 +145,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
}
define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s1, [x0]
-; CHECK-NEXT: ldr s2, [x1]
-; CHECK-NEXT: movi d0, #0xff00ff00ff00ff
-; CHECK-NEXT: uaddl v1.8h, v1.8b, v2.8b
-; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h
-; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
-; CHECK-NEXT: str s0, [x2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s1, [x0]
+; CHECK-SD-NEXT: ldr s2, [x1]
+; CHECK-SD-NEXT: movi d0, #0xff00ff00ff00ff
+; CHECK-SD-NEXT: uaddl v1.8h, v1.8b, v2.8b
+; CHECK-SD-NEXT: umin v0.4h, v1.4h, v0.4h
+; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-SD-NEXT: str s0, [x2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v0.b[3]
+; CHECK-GI-NEXT: mov b6, v1.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov b2, v1.b[2]
+; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
+; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
+; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
+; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: str w8, [x2]
+; CHECK-GI-NEXT: ret
%x = load <4 x i8>, ptr %px
%y = load <4 x i8>, ptr %py
%z = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
@@ -194,24 +227,38 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind {
}
define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-NEXT: ldrh w10, [x0, #2]
-; CHECK-NEXT: ldrh w11, [x1, #2]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: mov v0.s[1], w10
-; CHECK-NEXT: mov v1.s[1], w11
-; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: strh w9, [x2]
-; CHECK-NEXT: strh w8, [x2, #2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldrh w8, [x0]
+; CHECK-SD-NEXT: ldrh w9, [x1]
+; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-SD-NEXT: ldrh w10, [x0, #2]
+; CHECK-SD-NEXT: ldrh w11, [x1, #2]
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: fmov s1, w9
+; CHECK-SD-NEXT: mov v0.s[1], w10
+; CHECK-SD-NEXT: mov v1.s[1], w11
+; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s
+; CHECK-SD-NEXT: mov w8, v0.s[1]
+; CHECK-SD-NEXT: fmov w9, s0
+; CHECK-SD-NEXT: strh w9, [x2]
+; CHECK-SD-NEXT: strh w8, [x2, #2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr h1, [x0, #2]
+; CHECK-GI-NEXT: ldr h2, [x1]
+; CHECK-GI-NEXT: ldr h3, [x1, #2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[1], v3.h[0]
+; CHECK-GI-NEXT: uqadd v0.4h, v0.4h, v2.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: str h0, [x2]
+; CHECK-GI-NEXT: str h1, [x2, #2]
+; CHECK-GI-NEXT: ret
%x = load <2 x i16>, ptr %px
%y = load <2 x i16>, ptr %py
%z = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
@@ -229,15 +276,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
}
define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v12i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: uqadd v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: uqadd v1.8h, v2.8h, v3.8h
-; CHECK-NEXT: str q0, [x2]
-; CHECK-NEXT: str d1, [x2, #16]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v12i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldp q0, q3, [x1]
+; CHECK-SD-NEXT: ldp q1, q2, [x0]
+; CHECK-SD-NEXT: uqadd v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: uqadd v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT: str q0, [x2]
+; CHECK-SD-NEXT: str d1, [x2, #16]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v12i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: ldr d2, [x0, #16]
+; CHECK-GI-NEXT: ldr d3, [x1, #16]
+; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: uqadd v1.4h, v2.4h, v3.4h
+; CHECK-GI-NEXT: str q0, [x2]
+; CHECK-GI-NEXT: str d1, [x2, #16]
+; CHECK-GI-NEXT: ret
%x = load <12 x i16>, ptr %px
%y = load <12 x i16>, ptr %py
%z = call <12 x i16> @llvm.uadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y)
@@ -336,23 +395,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
}
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqadd v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: uqadd v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqadd v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: uqadd v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqadd v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: uqadd v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: ret
%z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
}
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
-; CHECK-LABEL: v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s
-; CHECK-NEXT: uqadd v0.4s, v0.4s, v4.4s
-; CHECK-NEXT: uqadd v1.4s, v1.4s, v5.4s
-; CHECK-NEXT: uqadd v3.4s, v3.4s, v7.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v16i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqadd v2.4s, v2.4s, v6.4s
+; CHECK-SD-NEXT: uqadd v0.4s, v0.4s, v4.4s
+; CHECK-SD-NEXT: uqadd v1.4s, v1.4s, v5.4s
+; CHECK-SD-NEXT: uqadd v3.4s, v3.4s, v7.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v16i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqadd v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: uqadd v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT: uqadd v2.4s, v2.4s, v6.4s
+; CHECK-GI-NEXT: uqadd v3.4s, v3.4s, v7.4s
+; CHECK-GI-NEXT: ret
%z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
ret <16 x i32> %z
}
@@ -367,23 +440,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
}
define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
-; CHECK-LABEL: v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqadd v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: uqadd v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqadd v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: uqadd v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqadd v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: uqadd v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: ret
%z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
}
define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d
-; CHECK-NEXT: uqadd v0.2d, v0.2d, v4.2d
-; CHECK-NEXT: uqadd v1.2d, v1.2d, v5.2d
-; CHECK-NEXT: uqadd v3.2d, v3.2d, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqadd v2.2d, v2.2d, v6.2d
+; CHECK-SD-NEXT: uqadd v0.2d, v0.2d, v4.2d
+; CHECK-SD-NEXT: uqadd v1.2d, v1.2d, v5.2d
+; CHECK-SD-NEXT: uqadd v3.2d, v3.2d, v7.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqadd v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: uqadd v1.2d, v1.2d, v5.2d
+; CHECK-GI-NEXT: uqadd v2.2d, v2.2d, v6.2d
+; CHECK-GI-NEXT: uqadd v3.2d, v3.2d, v7.2d
+; CHECK-GI-NEXT: ret
%z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z
}
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index 82c0327..3bc2796 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -2,28 +2,10 @@
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for v16i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16
+; CHECK-GI: warning: Instruction selection used fallback path for v2i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128
declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>)
@@ -68,23 +50,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
}
define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
-; CHECK-LABEL: v32i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqsub v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: uqsub v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v32i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqsub v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: uqsub v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v32i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqsub v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: uqsub v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: ret
%z = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %x, <32 x i8> %y)
ret <32 x i8> %z
}
define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
-; CHECK-LABEL: v64i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b
-; CHECK-NEXT: uqsub v0.16b, v0.16b, v4.16b
-; CHECK-NEXT: uqsub v1.16b, v1.16b, v5.16b
-; CHECK-NEXT: uqsub v3.16b, v3.16b, v7.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v64i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqsub v2.16b, v2.16b, v6.16b
+; CHECK-SD-NEXT: uqsub v0.16b, v0.16b, v4.16b
+; CHECK-SD-NEXT: uqsub v1.16b, v1.16b, v5.16b
+; CHECK-SD-NEXT: uqsub v3.16b, v3.16b, v7.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v64i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqsub v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT: uqsub v1.16b, v1.16b, v5.16b
+; CHECK-GI-NEXT: uqsub v2.16b, v2.16b, v6.16b
+; CHECK-GI-NEXT: uqsub v3.16b, v3.16b, v7.16b
+; CHECK-GI-NEXT: ret
%z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y)
ret <64 x i8> %z
}
@@ -99,23 +95,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
}
define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind {
-; CHECK-LABEL: v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqsub v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: uqsub v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v16i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqsub v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: uqsub v0.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v16i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: uqsub v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: ret
%z = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %x, <16 x i16> %y)
ret <16 x i16> %z
}
define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind {
-; CHECK-LABEL: v32i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h
-; CHECK-NEXT: uqsub v0.8h, v0.8h, v4.8h
-; CHECK-NEXT: uqsub v1.8h, v1.8h, v5.8h
-; CHECK-NEXT: uqsub v3.8h, v3.8h, v7.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v32i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqsub v2.8h, v2.8h, v6.8h
+; CHECK-SD-NEXT: uqsub v0.8h, v0.8h, v4.8h
+; CHECK-SD-NEXT: uqsub v1.8h, v1.8h, v5.8h
+; CHECK-SD-NEXT: uqsub v3.8h, v3.8h, v7.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v32i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v4.8h
+; CHECK-GI-NEXT: uqsub v1.8h, v1.8h, v5.8h
+; CHECK-GI-NEXT: uqsub v2.8h, v2.8h, v6.8h
+; CHECK-GI-NEXT: uqsub v3.8h, v3.8h, v7.8h
+; CHECK-GI-NEXT: ret
%z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y)
ret <32 x i16> %z
}
@@ -136,16 +146,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
}
define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ldr s1, [x1]
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
-; CHECK-NEXT: str s0, [x2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldr s0, [x0]
+; CHECK-SD-NEXT: ldr s1, [x1]
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: uqsub v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-SD-NEXT: str s0, [x2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: ldr w9, [x1]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: mov b2, v0.b[1]
+; CHECK-GI-NEXT: mov b3, v1.b[1]
+; CHECK-GI-NEXT: mov b4, v0.b[2]
+; CHECK-GI-NEXT: mov b5, v0.b[3]
+; CHECK-GI-NEXT: mov b6, v1.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov b2, v1.b[2]
+; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
+; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
+; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
+; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: str w8, [x2]
+; CHECK-GI-NEXT: ret
%x = load <4 x i8>, ptr %px
%y = load <4 x i8>, ptr %py
%z = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %x, <4 x i8> %y)
@@ -193,22 +226,36 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind {
}
define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ldrh w9, [x1]
-; CHECK-NEXT: ldrh w10, [x0, #2]
-; CHECK-NEXT: ldrh w11, [x1, #2]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: mov v0.s[1], w10
-; CHECK-NEXT: mov v1.s[1], w11
-; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: strh w9, [x2]
-; CHECK-NEXT: strh w8, [x2, #2]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldrh w8, [x0]
+; CHECK-SD-NEXT: ldrh w9, [x1]
+; CHECK-SD-NEXT: ldrh w10, [x0, #2]
+; CHECK-SD-NEXT: ldrh w11, [x1, #2]
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: fmov s1, w9
+; CHECK-SD-NEXT: mov v0.s[1], w10
+; CHECK-SD-NEXT: mov v1.s[1], w11
+; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: mov w8, v0.s[1]
+; CHECK-SD-NEXT: fmov w9, s0
+; CHECK-SD-NEXT: strh w9, [x2]
+; CHECK-SD-NEXT: strh w8, [x2, #2]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr h0, [x0]
+; CHECK-GI-NEXT: ldr h1, [x0, #2]
+; CHECK-GI-NEXT: ldr h2, [x1]
+; CHECK-GI-NEXT: ldr h3, [x1, #2]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[1], v3.h[0]
+; CHECK-GI-NEXT: uqsub v0.4h, v0.4h, v2.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: str h0, [x2]
+; CHECK-GI-NEXT: str h1, [x2, #2]
+; CHECK-GI-NEXT: ret
%x = load <2 x i16>, ptr %px
%y = load <2 x i16>, ptr %py
%z = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %x, <2 x i16> %y)
@@ -226,15 +273,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind {
}
define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind {
-; CHECK-LABEL: v12i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: uqsub v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: uqsub v1.8h, v2.8h, v3.8h
-; CHECK-NEXT: str q0, [x2]
-; CHECK-NEXT: str d1, [x2, #16]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v12i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ldp q0, q3, [x1]
+; CHECK-SD-NEXT: ldp q1, q2, [x0]
+; CHECK-SD-NEXT: uqsub v0.8h, v1.8h, v0.8h
+; CHECK-SD-NEXT: uqsub v1.8h, v2.8h, v3.8h
+; CHECK-SD-NEXT: str q0, [x2]
+; CHECK-SD-NEXT: str d1, [x2, #16]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v12i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: ldr d2, [x0, #16]
+; CHECK-GI-NEXT: ldr d3, [x1, #16]
+; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: uqsub v1.4h, v2.4h, v3.4h
+; CHECK-GI-NEXT: str q0, [x2]
+; CHECK-GI-NEXT: str d1, [x2, #16]
+; CHECK-GI-NEXT: ret
%x = load <12 x i16>, ptr %px
%y = load <12 x i16>, ptr %py
%z = call <12 x i16> @llvm.usub.sat.v12i16(<12 x i16> %x, <12 x i16> %y)
@@ -334,23 +393,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
}
define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqsub v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: uqsub v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqsub v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: uqsub v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqsub v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: uqsub v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: ret
%z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y)
ret <8 x i32> %z
}
define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind {
-; CHECK-LABEL: v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s
-; CHECK-NEXT: uqsub v0.4s, v0.4s, v4.4s
-; CHECK-NEXT: uqsub v1.4s, v1.4s, v5.4s
-; CHECK-NEXT: uqsub v3.4s, v3.4s, v7.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v16i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqsub v2.4s, v2.4s, v6.4s
+; CHECK-SD-NEXT: uqsub v0.4s, v0.4s, v4.4s
+; CHECK-SD-NEXT: uqsub v1.4s, v1.4s, v5.4s
+; CHECK-SD-NEXT: uqsub v3.4s, v3.4s, v7.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v16i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqsub v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: uqsub v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT: uqsub v2.4s, v2.4s, v6.4s
+; CHECK-GI-NEXT: uqsub v3.4s, v3.4s, v7.4s
+; CHECK-GI-NEXT: ret
%z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y)
ret <16 x i32> %z
}
@@ -365,23 +438,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
}
define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind {
-; CHECK-LABEL: v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqsub v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: uqsub v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqsub v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: uqsub v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqsub v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: uqsub v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: ret
%z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y)
ret <4 x i64> %z
}
define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d
-; CHECK-NEXT: uqsub v0.2d, v0.2d, v4.2d
-; CHECK-NEXT: uqsub v1.2d, v1.2d, v5.2d
-; CHECK-NEXT: uqsub v3.2d, v3.2d, v7.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: v8i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uqsub v2.2d, v2.2d, v6.2d
+; CHECK-SD-NEXT: uqsub v0.2d, v0.2d, v4.2d
+; CHECK-SD-NEXT: uqsub v1.2d, v1.2d, v5.2d
+; CHECK-SD-NEXT: uqsub v3.2d, v3.2d, v7.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uqsub v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: uqsub v1.2d, v1.2d, v5.2d
+; CHECK-GI-NEXT: uqsub v2.2d, v2.2d, v6.2d
+; CHECK-GI-NEXT: uqsub v3.2d, v3.2d, v7.2d
+; CHECK-GI-NEXT: ret
%z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %z
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index c25b0f2..78d9084 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -16,7 +16,6 @@ define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: .LBB0_2: ; %endif
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%c = icmp ne i32 %value, 0
@@ -44,7 +43,6 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: .LBB1_2: ; %endif
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%c = icmp ne i32 %value, 0
@@ -74,7 +72,6 @@ define i32 @divergent_if_nonboolean_condition0(i32 %value) {
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: .LBB2_2: ; %endif
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%c = trunc i32 %value to i1
@@ -106,7 +103,6 @@ define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) {
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: .LBB3_2: ; %endif
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%value = load i32, ptr addrspace(1) %ptr
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
index 303dc46..5c22d5b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
@@ -131,8 +131,6 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB1_2: ; %bb1
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
br i1 %val, label %bb0, label %bb1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
index 63702d2..e005c38 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
@@ -131,8 +131,6 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB1_2: ; %bb1
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
%val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
br i1 %val, label %bb0, label %bb1
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 352adac..af6f6913 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -39,9 +39,9 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1
; GFX7LESS-NEXT: v_mad_u32_u24 v0, v0, 5, s4
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@@ -65,11 +65,11 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB0_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v1
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_mad_u32_u24 v0, v0, 5, s4
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -92,11 +92,11 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB0_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v1
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s4
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -253,8 +253,8 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, i32 %additive)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_mul_lo_u32 v0, s6, v0
; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s4, v0
; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -504,11 +504,11 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB2_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -544,11 +544,11 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB2_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_add_u32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -944,7 +944,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0
; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2
@@ -952,6 +951,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5
; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s4, v0
; GFX7LESS-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@@ -974,7 +974,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: .LBB4_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v0
; GFX8-NEXT: v_readfirstlane_b32 s3, v1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1006,7 +1005,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: .LBB4_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s2, v0
; GFX9-NEXT: v_readfirstlane_b32 s3, v1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -1219,11 +1217,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive)
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: .LBB5_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: v_readfirstlane_b32 s5, v1
; GFX8-NEXT: v_mov_b32_e32 v0, s4
; GFX8-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mul_lo_u32 v3, s3, v2
; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s2, v2, v[0:1]
; GFX8-NEXT: s_mov_b32 s7, 0xf000
@@ -1258,11 +1256,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive)
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: .LBB5_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, v[0:1]
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
@@ -1530,10 +1528,10 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1
; GFX7LESS-NEXT: v_mul_u32_u24_e32 v0, 5, v0
; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@@ -1557,12 +1555,12 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB7_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v1
; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -1585,12 +1583,12 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB7_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v1
; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -1751,8 +1749,8 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, i32 %subitive)
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_mul_lo_u32 v0, s6, v0
; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -2006,11 +2004,11 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB9_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -2046,11 +2044,11 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB9_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_sub_u32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -2446,7 +2444,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0
; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2
@@ -2454,6 +2451,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5
; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
; GFX7LESS-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@@ -2477,7 +2475,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB11_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: v_readfirstlane_b32 s5, v1
; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v2
@@ -2487,6 +2484,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -2509,7 +2507,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB11_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v2
@@ -2519,6 +2516,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -3081,11 +3079,11 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB14_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_and_b32_e32 v0, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -3121,11 +3119,11 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB14_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_and_b32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -3355,11 +3353,11 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB15_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_or_b32_e32 v0, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -3395,11 +3393,11 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB15_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_or_b32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -3629,11 +3627,11 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB16_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_xor_b32_e32 v0, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -3669,11 +3667,11 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB16_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_xor_b32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -3903,11 +3901,11 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB17_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_max_i32_e32 v0, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -3943,11 +3941,11 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB17_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_max_i32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -4151,7 +4149,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0
; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1
; GFX7LESS-NEXT: v_bfrev_b32_e32 v1, 1
@@ -4162,6 +4159,7 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4
; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@@ -4182,7 +4180,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: .LBB18_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: v_bfrev_b32_e32 v0, 1
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -4216,7 +4213,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: .LBB18_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: v_bfrev_b32_e32 v0, 1
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -4419,11 +4415,11 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB19_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_min_i32_e32 v0, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -4459,11 +4455,11 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB19_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_min_i32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -4667,7 +4663,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0
; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1
; GFX7LESS-NEXT: v_bfrev_b32_e32 v1, -2
@@ -4678,6 +4673,7 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4
; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@@ -4698,7 +4694,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: .LBB20_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: v_bfrev_b32_e32 v0, -2
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -4732,7 +4727,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: .LBB20_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: v_bfrev_b32_e32 v0, -2
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -4935,11 +4929,11 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB21_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_max_u32_e32 v0, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -4975,11 +4969,11 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB21_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_max_u32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -5183,7 +5177,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0
; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1
; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
@@ -5193,6 +5186,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX7LESS-NEXT: v_mov_b32_e32 v1, s5
; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@@ -5214,7 +5208,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB22_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: v_readfirstlane_b32 s5, v1
; GFX8-NEXT: v_mov_b32_e32 v1, 0
@@ -5226,6 +5219,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -5246,7 +5240,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB22_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
@@ -5258,6 +5251,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -5446,11 +5440,11 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB23_4:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_min_u32_e32 v0, s4, v1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -5486,11 +5480,11 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB23_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_min_u32_e32 v0, s4, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -5694,7 +5688,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000
; GFX7LESS-NEXT: s_mov_b32 s2, -1
-; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0
; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1
; GFX7LESS-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
@@ -5704,6 +5697,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4
; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7LESS-NEXT: s_endpgm
;
@@ -5725,7 +5719,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: .LBB24_2:
; GFX8-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: v_readfirstlane_b32 s5, v1
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
@@ -5737,6 +5730,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
@@ -5757,7 +5751,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: .LBB24_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
@@ -5769,6 +5762,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) {
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
index 19a1d2d9..c9076a9 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
@@ -186,7 +186,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: .LBB1_8: ; %atomicrmw.phi
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v0, v3
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 9865883..bf4302c 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -5678,22 +5678,18 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) {
; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v31, off, s32
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_clause 0x4
-; GFX11-NEXT: scratch_store_b128 off, v[18:21], s0 offset:64
-; GFX11-NEXT: scratch_store_b128 off, v[10:13], s0 offset:32
-; GFX11-NEXT: scratch_store_b128 off, v[6:9], s0 offset:16
-; GFX11-NEXT: scratch_store_b128 off, v[2:5], s0
-; GFX11-NEXT: scratch_store_b16 off, v1, s0 offset:128
-; GFX11-NEXT: s_add_i32 s1, s0, 0x70
-; GFX11-NEXT: s_add_i32 s2, s0, 0x60
-; GFX11-NEXT: s_add_i32 s3, s0, 0x50
-; GFX11-NEXT: s_add_i32 s0, s0, 48
+; GFX11-NEXT: s_clause 0x5
+; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
+; GFX11-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
+; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
+; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
+; GFX11-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[2:5], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[30:33], s1
-; GFX11-NEXT: scratch_store_b128 off, v[26:29], s2
-; GFX11-NEXT: scratch_store_b128 off, v[22:25], s3
-; GFX11-NEXT: scratch_store_b128 off, v[14:17], s0
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
+; GFX11-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
+; GFX11-NEXT: scratch_store_b16 v0, v1, off offset:128
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0
%ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1
@@ -8827,19 +8823,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
; GFX11-NEXT: global_load_u16 v32, v[1:2], off offset:54
; GFX11-NEXT: global_load_u16 v33, v[1:2], off offset:58
; GFX11-NEXT: global_load_u16 v1, v[1:2], off offset:62
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_add_i32 s1, s0, 0xf0
-; GFX11-NEXT: s_add_i32 s2, s0, 0xe0
-; GFX11-NEXT: s_add_i32 s3, s0, 0xd0
-; GFX11-NEXT: s_add_i32 s4, s0, 0xc0
-; GFX11-NEXT: s_add_i32 s5, s0, 0xb0
-; GFX11-NEXT: s_add_i32 s6, s0, 0xa0
-; GFX11-NEXT: s_add_i32 s7, s0, 0x90
-; GFX11-NEXT: s_add_i32 s8, s0, 0x70
-; GFX11-NEXT: s_add_i32 s9, s0, 0x60
-; GFX11-NEXT: s_add_i32 s10, s0, 0x50
-; GFX11-NEXT: s_add_i32 s11, s0, 48
; GFX11-NEXT: s_waitcnt vmcnt(31)
; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v3
; GFX11-NEXT: s_waitcnt vmcnt(30)
@@ -8936,23 +8919,23 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) {
; GFX11-NEXT: v_cvt_f64_f32_e32 v[5:6], v5
; GFX11-NEXT: v_cvt_f64_f32_e32 v[3:4], v2
; GFX11-NEXT: v_cvt_f64_f32_e32 v[1:2], v37
-; GFX11-NEXT: scratch_store_b128 off, v[96:99], s1
-; GFX11-NEXT: scratch_store_b128 off, v[84:87], s2
-; GFX11-NEXT: scratch_store_b128 off, v[80:83], s3
-; GFX11-NEXT: scratch_store_b128 off, v[68:71], s4
-; GFX11-NEXT: scratch_store_b128 off, v[64:67], s5
-; GFX11-NEXT: scratch_store_b128 off, v[52:55], s6
-; GFX11-NEXT: scratch_store_b128 off, v[48:51], s7
-; GFX11-NEXT: scratch_store_b128 off, v[33:36], s0 offset:128
-; GFX11-NEXT: scratch_store_b128 off, v[29:32], s8
-; GFX11-NEXT: scratch_store_b128 off, v[25:28], s9
-; GFX11-NEXT: scratch_store_b128 off, v[21:24], s10
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64
-; GFX11-NEXT: scratch_store_b128 off, v[13:16], s11
-; GFX11-NEXT: s_clause 0x2
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
+; GFX11-NEXT: s_clause 0xf
+; GFX11-NEXT: scratch_store_b128 v0, v[96:99], off offset:240
+; GFX11-NEXT: scratch_store_b128 v0, v[84:87], off offset:224
+; GFX11-NEXT: scratch_store_b128 v0, v[80:83], off offset:208
+; GFX11-NEXT: scratch_store_b128 v0, v[68:71], off offset:192
+; GFX11-NEXT: scratch_store_b128 v0, v[64:67], off offset:176
+; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160
+; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144
+; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128
+; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112
+; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96
+; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80
+; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64
+; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48
+; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32
+; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
%load = load <32 x bfloat>, ptr addrspace(1) %ptr
%fpext = fpext <32 x bfloat> %load to <32 x double>
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index ac50fb8..da609bf 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -41,7 +41,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
; GCN-NEXT: .LBB0_2: ; %endif
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v1, 0x3d0000
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_store_dword v1, v0, s[0:1] offset:2300
; GCN-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
index 069c57e..6dabd8c 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
@@ -103,7 +103,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1)
; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: .LBB0_4: ; %exit
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v3 op_sel_hi:[0,0]
; GFX9-NEXT: s_movk_i32 s4, 0x8000
; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0
@@ -131,7 +130,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1)
; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: .LBB0_4: ; %exit
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v3 op_sel_hi:[0,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -266,7 +264,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace(
; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: .LBB1_4: ; %exit
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v5 op_sel_hi:[0,1]
; GFX9-NEXT: s_movk_i32 s4, 0x8000
; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0
@@ -294,7 +291,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace(
; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: .LBB1_4: ; %exit
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v5 op_sel_hi:[0,1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -431,7 +427,6 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1
; GFX9-NEXT: .LBB2_4: ; %exit
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3900
; GFX9-NEXT: v_mov_b32_e32 v1, 0x3d00
-; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_cmp_ge_f16_e32 vcc, 0.5, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0x3800
; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
@@ -461,7 +456,6 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: .LBB2_4: ; %exit
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3d00
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
@@ -665,7 +659,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1
; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: .LBB3_4: ; %exit
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v3 op_sel_hi:[0,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -871,7 +864,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace
; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: .LBB4_4: ; %exit
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v5 op_sel_hi:[0,1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -1081,7 +1073,6 @@ define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: .LBB5_4: ; %exit
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3d00
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
@@ -1432,7 +1423,6 @@ define amdgpu_gfx <8 x i16> @vec_16xi16_extract_8xi16_0(i1 inreg %cond, ptr addr
; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: .LBB7_4: ; %exit
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v5
; GFX11-NEXT: v_mov_b32_e32 v9, 0x3900
; GFX11-NEXT: v_mov_b32_e32 v1, 0x3d00
@@ -1724,7 +1714,6 @@ define amdgpu_gfx <8 x half> @vec_16xf16_extract_8xf16_0(i1 inreg %cond, ptr add
; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: .LBB8_4: ; %exit
-; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v5
; GFX11-NEXT: v_mov_b32_e32 v9, 0x3900
; GFX11-NEXT: v_mov_b32_e32 v1, 0x3d00
diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll
index db89ad6..3b2f15c 100644
--- a/llvm/test/CodeGen/AMDGPU/function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-args.ll
@@ -114,7 +114,6 @@ define void @i1_arg_i1_use(i1 %arg) #0 {
; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: .LBB3_2: ; %bb2
; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5]
-; CIGFX89-NEXT: s_waitcnt vmcnt(0)
; CIGFX89-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: i1_arg_i1_use:
diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll
index acadee2..401cbce 100644
--- a/llvm/test/CodeGen/AMDGPU/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll
@@ -1561,34 +1561,28 @@ define <33 x i32> @v33i32_func_void() #0 {
; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80
; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64
; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48
-; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:16
-; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0
-; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:32
+; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32
+; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16
+; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_add_i32 s1, s0, 0x70
-; GFX11-NEXT: s_add_i32 s2, s0, 0x60
-; GFX11-NEXT: s_add_i32 s3, s0, 0x50
-; GFX11-NEXT: s_add_i32 s4, s0, 48
; GFX11-NEXT: s_waitcnt vmcnt(8)
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112
; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2
+; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96
; GFX11-NEXT: s_waitcnt vmcnt(6)
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3
+; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80
; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: scratch_store_b128 off, v[13:16], s0 offset:64
+; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64
; GFX11-NEXT: s_waitcnt vmcnt(4)
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s4
+; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48
; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: scratch_store_b128 off, v[21:24], s0 offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32
; GFX11-NEXT: s_waitcnt vmcnt(2)
-; GFX11-NEXT: scratch_store_b128 off, v[25:28], s0
+; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:32
+; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b32 off, v33, s0 offset:128
+; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load <33 x i32>, ptr addrspace(1) %ptr
@@ -1850,34 +1844,28 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80
; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64
; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48
-; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:16
-; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0
-; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:32
+; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32
+; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16
+; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0
; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_add_i32 s1, s0, 0x70
-; GFX11-NEXT: s_add_i32 s2, s0, 0x60
-; GFX11-NEXT: s_add_i32 s3, s0, 0x50
-; GFX11-NEXT: s_add_i32 s4, s0, 48
; GFX11-NEXT: s_waitcnt vmcnt(8)
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112
; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2
+; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96
; GFX11-NEXT: s_waitcnt vmcnt(6)
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3
+; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80
; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: scratch_store_b128 off, v[13:16], s0 offset:64
+; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64
; GFX11-NEXT: s_waitcnt vmcnt(4)
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s4
+; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48
; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: scratch_store_b128 off, v[21:24], s0 offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32
; GFX11-NEXT: s_waitcnt vmcnt(2)
-; GFX11-NEXT: scratch_store_b128 off, v[25:28], s0
+; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16
; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:32
+; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b32 off, v33, s0 offset:128
+; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr
@@ -2143,33 +2131,24 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144
; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128
; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: s_add_i32 s1, s0, 0xf0
-; GFX11-NEXT: s_add_i32 s2, s0, 0xe0
-; GFX11-NEXT: s_add_i32 s3, s0, 0xd0
-; GFX11-NEXT: s_add_i32 s4, s0, 0xc0
-; GFX11-NEXT: s_add_i32 s5, s0, 0xb0
-; GFX11-NEXT: s_add_i32 s6, s0, 0xa0
-; GFX11-NEXT: s_add_i32 s7, s0, 0x90
; GFX11-NEXT: s_waitcnt vmcnt(8)
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240
; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2
+; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:224
; GFX11-NEXT: s_waitcnt vmcnt(6)
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3
+; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:208
; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: scratch_store_b128 off, v[13:16], s4
+; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:192
; GFX11-NEXT: s_waitcnt vmcnt(4)
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s5
+; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:176
; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: scratch_store_b128 off, v[21:24], s6
+; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:160
; GFX11-NEXT: s_waitcnt vmcnt(2)
-; GFX11-NEXT: scratch_store_b128 off, v[25:28], s7
+; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:144
; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:128
+; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:128
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b32 off, v33, s0
+; GFX11-NEXT: scratch_store_b32 v0, v33, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
%ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
%val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index c1d6826..3b078c4 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -1989,256 +1989,138 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 {
; GFX11-NEXT: s_mov_b32 s2, s0
; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
-; GFX11-NEXT: s_clause 0x7
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:1024
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:512
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:256
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:128
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:64
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:32
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:16
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
-; GFX11-NEXT: s_add_i32 s1, s0, 0x7f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x7e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x7d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x7c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x7b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x7a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x790
-; GFX11-NEXT: s_add_i32 s2, s0, 0x780
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x770
-; GFX11-NEXT: s_add_i32 s2, s0, 0x760
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x750
-; GFX11-NEXT: s_add_i32 s2, s0, 0x740
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x730
-; GFX11-NEXT: s_add_i32 s2, s0, 0x720
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x710
-; GFX11-NEXT: s_add_i32 s2, s0, 0x700
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x6f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x6e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x6d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x6c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x6b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x6a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x690
-; GFX11-NEXT: s_add_i32 s2, s0, 0x680
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x670
-; GFX11-NEXT: s_add_i32 s2, s0, 0x660
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x650
-; GFX11-NEXT: s_add_i32 s2, s0, 0x640
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x630
-; GFX11-NEXT: s_add_i32 s2, s0, 0x620
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x610
-; GFX11-NEXT: s_add_i32 s2, s0, 0x600
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x5f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x5e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x5d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x5c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x5b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x5a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x590
-; GFX11-NEXT: s_add_i32 s2, s0, 0x580
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x570
-; GFX11-NEXT: s_add_i32 s2, s0, 0x560
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x550
-; GFX11-NEXT: s_add_i32 s2, s0, 0x540
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x530
-; GFX11-NEXT: s_add_i32 s2, s0, 0x520
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x510
-; GFX11-NEXT: s_add_i32 s2, s0, 0x500
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x4f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x4e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x4d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x4c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x4b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x4a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x490
-; GFX11-NEXT: s_add_i32 s2, s0, 0x480
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x470
-; GFX11-NEXT: s_add_i32 s2, s0, 0x460
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x450
-; GFX11-NEXT: s_add_i32 s2, s0, 0x440
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x430
-; GFX11-NEXT: s_add_i32 s2, s0, 0x420
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x410
-; GFX11-NEXT: s_add_i32 s2, s0, 0x3f0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x3e0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x3d0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x3c0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x3b0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x3a0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x390
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x380
-; GFX11-NEXT: s_add_i32 s2, s0, 0x370
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x360
-; GFX11-NEXT: s_add_i32 s2, s0, 0x350
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x340
-; GFX11-NEXT: s_add_i32 s2, s0, 0x330
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x320
-; GFX11-NEXT: s_add_i32 s2, s0, 0x310
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x300
-; GFX11-NEXT: s_add_i32 s2, s0, 0x2f0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x2e0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x2d0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x2c0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x2b0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x2a0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x290
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x280
-; GFX11-NEXT: s_add_i32 s2, s0, 0x270
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x260
-; GFX11-NEXT: s_add_i32 s2, s0, 0x250
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x240
-; GFX11-NEXT: s_add_i32 s2, s0, 0x230
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x220
-; GFX11-NEXT: s_add_i32 s2, s0, 0x210
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x1f0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x1e0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x1d0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x1c0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x1b0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x1a0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x190
-; GFX11-NEXT: s_add_i32 s2, s0, 0x180
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x170
-; GFX11-NEXT: s_add_i32 s2, s0, 0x160
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x150
-; GFX11-NEXT: s_add_i32 s2, s0, 0x140
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x130
-; GFX11-NEXT: s_add_i32 s2, s0, 0x120
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x110
-; GFX11-NEXT: s_add_i32 s2, s0, 0xf0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0xe0
-; GFX11-NEXT: s_add_i32 s2, s0, 0xd0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0xc0
-; GFX11-NEXT: s_add_i32 s2, s0, 0xb0
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0xa0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x90
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x70
-; GFX11-NEXT: s_add_i32 s2, s0, 0x60
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2
-; GFX11-NEXT: s_add_i32 s1, s0, 0x50
-; GFX11-NEXT: s_add_i32 s0, s0, 48
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
+; GFX11-NEXT: s_clause 0x1f
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2032
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2016
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2000
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1984
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1968
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1952
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1936
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1920
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1904
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1888
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1872
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1856
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1840
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1824
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1808
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1792
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1776
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1760
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1744
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1728
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1712
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1696
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1680
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1664
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1648
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1632
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1616
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1600
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1584
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1568
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1552
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1536
+; GFX11-NEXT: s_clause 0x1f
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1520
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1504
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1488
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1472
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1456
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1440
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1424
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1408
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1392
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1376
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1360
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1344
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1328
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1312
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1296
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1280
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1264
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1248
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1232
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1216
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1200
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1184
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1168
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1152
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1136
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1120
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1104
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1088
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1072
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1056
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1040
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1024
+; GFX11-NEXT: s_clause 0x1f
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1008
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:992
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:976
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:960
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:944
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:928
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:912
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:896
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:880
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:864
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:848
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:832
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:816
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:800
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:784
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:768
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:752
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:736
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:720
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:704
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:688
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:672
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:656
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:640
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:624
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:608
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:592
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:576
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:560
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:544
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:528
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:512
+; GFX11-NEXT: s_clause 0x1f
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:496
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:480
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:464
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:448
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:432
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:416
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:400
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:384
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:368
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:352
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:336
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:320
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:304
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:288
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:272
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:256
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:224
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:208
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:192
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:176
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:160
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:144
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:128
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:80
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:64
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:32
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
ret <512 x i32> zeroinitializer
@@ -2636,7 +2518,6 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 {
; GFX11-LABEL: return_72xi32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_clause 0xc
; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:212
; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:208
@@ -2651,93 +2532,82 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 {
; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:172
; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:168
; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:164
-; GFX11-NEXT: s_clause 0x14
-; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:32
-; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:28
-; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:24
-; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48
-; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44
-; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40
-; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64
-; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60
-; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56
-; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:80
-; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:76
-; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:72
-; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:96
-; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:92
-; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:88
-; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:112
-; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:108
-; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:104
-; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:128
-; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:124
-; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:120
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64
+; GFX11-NEXT: s_clause 0x11
+; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:16
+; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:12
+; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:8
+; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:32
+; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:28
+; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:24
+; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:48
+; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:44
+; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:40
+; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:64
+; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:60
+; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:56
+; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:80
+; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:76
+; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:72
+; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:96
+; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:92
+; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:88
+; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: scratch_load_b32 v23, off, s32 offset:112
+; GFX11-NEXT: scratch_load_b32 v22, off, s32 offset:108
+; GFX11-NEXT: scratch_load_b32 v21, off, s32 offset:104
+; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64
; GFX11-NEXT: s_clause 0x2
-; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:144
-; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:140
-; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:136
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32
+; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:128
+; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:124
+; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:120
+; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48
; GFX11-NEXT: s_clause 0x2
-; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:160
-; GFX11-NEXT: scratch_load_b32 v11, off, s32 offset:156
-; GFX11-NEXT: scratch_load_b32 v10, off, s32 offset:152
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16
+; GFX11-NEXT: scratch_load_b32 v15, off, s32 offset:144
+; GFX11-NEXT: scratch_load_b32 v14, off, s32 offset:140
+; GFX11-NEXT: scratch_load_b32 v13, off, s32 offset:136
+; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32
; GFX11-NEXT: s_clause 0xd
-; GFX11-NEXT: scratch_load_b32 v8, off, s32 offset:16
-; GFX11-NEXT: scratch_load_b32 v7, off, s32 offset:12
-; GFX11-NEXT: scratch_load_b32 v6, off, s32 offset:8
-; GFX11-NEXT: scratch_load_b32 v5, off, s32 offset:4
-; GFX11-NEXT: scratch_load_b32 v9, off, s32 offset:148
-; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:132
-; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:116
-; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:100
-; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:84
-; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:68
-; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52
-; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36
-; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20
+; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:160
+; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:156
+; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:152
+; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:148
+; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:132
+; GFX11-NEXT: scratch_load_b32 v16, off, s32 offset:116
+; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:100
+; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:84
+; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68
+; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:52
+; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:36
+; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:20
+; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:4
; GFX11-NEXT: scratch_load_b32 v32, off, s32
-; GFX11-NEXT: s_add_i32 s1, s0, 0x110
-; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0
-; GFX11-NEXT: s_add_i32 s2, s0, 0x100
-; GFX11-NEXT: s_add_i32 s3, s0, 0xf0
-; GFX11-NEXT: s_add_i32 s34, s0, 0xe0
-; GFX11-NEXT: s_add_i32 s35, s0, 0xd0
-; GFX11-NEXT: s_add_i32 s36, s0, 0xc0
-; GFX11-NEXT: s_add_i32 s37, s0, 0xb0
-; GFX11-NEXT: s_add_i32 s38, s0, 0xa0
-; GFX11-NEXT: s_add_i32 s39, s0, 0x90
-; GFX11-NEXT: s_add_i32 s40, s0, 0x70
-; GFX11-NEXT: s_add_i32 s41, s0, 0x60
-; GFX11-NEXT: s_add_i32 s42, s0, 0x50
-; GFX11-NEXT: s_add_i32 s43, s0, 48
; GFX11-NEXT: s_waitcnt vmcnt(10)
-; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:128
+; GFX11-NEXT: scratch_store_b128 v0, v[60:63], off offset:272
; GFX11-NEXT: s_waitcnt vmcnt(9)
-; GFX11-NEXT: scratch_store_b128 off, v[9:12], s1
+; GFX11-NEXT: scratch_store_b128 v0, v[12:15], off offset:256
; GFX11-NEXT: s_waitcnt vmcnt(8)
-; GFX11-NEXT: scratch_store_b128 off, v[17:20], s2
+; GFX11-NEXT: scratch_store_b128 v0, v[16:19], off offset:240
; GFX11-NEXT: s_waitcnt vmcnt(7)
-; GFX11-NEXT: scratch_store_b128 off, v[60:63], s3
+; GFX11-NEXT: scratch_store_b128 v0, v[20:23], off offset:224
; GFX11-NEXT: s_waitcnt vmcnt(6)
-; GFX11-NEXT: scratch_store_b128 off, v[56:59], s34
+; GFX11-NEXT: scratch_store_b128 v0, v[56:59], off offset:208
; GFX11-NEXT: s_waitcnt vmcnt(5)
-; GFX11-NEXT: scratch_store_b128 off, v[41:44], s35
+; GFX11-NEXT: scratch_store_b128 v0, v[41:44], off offset:192
; GFX11-NEXT: s_waitcnt vmcnt(4)
-; GFX11-NEXT: scratch_store_b128 off, v[37:40], s36
+; GFX11-NEXT: scratch_store_b128 v0, v[37:40], off offset:176
; GFX11-NEXT: s_waitcnt vmcnt(3)
-; GFX11-NEXT: scratch_store_b128 off, v[52:55], s37
+; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160
; GFX11-NEXT: s_waitcnt vmcnt(2)
-; GFX11-NEXT: scratch_store_b128 off, v[48:51], s38
+; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144
; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: scratch_store_b128 off, v[33:36], s39
+; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[29:32], s40
-; GFX11-NEXT: scratch_store_b128 off, v[25:28], s41
-; GFX11-NEXT: scratch_store_b128 off, v[21:24], s42
-; GFX11-NEXT: scratch_store_b128 off, v[13:16], s43
+; GFX11-NEXT: s_clause 0x3
+; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112
+; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96
+; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16
+; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off
; GFX11-NEXT: s_clause 0xc
; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:164
; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:168
@@ -3306,7 +3176,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-LABEL: call_72xi32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s46, s33
+; GFX11-NEXT: s_mov_b32 s34, s33
; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00
@@ -3353,11 +3223,11 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT: s_add_i32 s0, s32, 32
; GFX11-NEXT: s_add_i32 s1, s32, 16
+; GFX11-NEXT: s_add_i32 s2, s33, 0x200
+; GFX11-NEXT: v_writelane_b32 v60, s30, 0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
-; GFX11-NEXT: s_add_i32 s0, s33, 0x200
-; GFX11-NEXT: v_writelane_b32 v60, s30, 0
-; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, 0
+; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
@@ -3373,14 +3243,14 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
-; GFX11-NEXT: s_mov_b32 s45, return_72xi32@abs32@hi
-; GFX11-NEXT: s_mov_b32 s44, return_72xi32@abs32@lo
+; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi
+; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo
; GFX11-NEXT: v_writelane_b32 v60, s31, 1
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45]
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624
; GFX11-NEXT: scratch_load_b128 v[33:36], off, s33 offset:640
-; GFX11-NEXT: s_add_i32 s0, s32, 0xa0
+; GFX11-NEXT: s_add_i32 s2, s32, 0xa0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: v_mov_b32_e32 v32, v48
; GFX11-NEXT: s_clause 0x9
@@ -3431,38 +3301,38 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6
; GFX11-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v6, v9
; GFX11-NEXT: v_mov_b32_e32 v9, v20
-; GFX11-NEXT: scratch_store_b32 off, v11, s0
-; GFX11-NEXT: s_add_i32 s0, s32, 0x90
+; GFX11-NEXT: scratch_store_b32 off, v11, s2
+; GFX11-NEXT: s_add_i32 s2, s32, 0x90
; GFX11-NEXT: v_mov_b32_e32 v11, v22
-; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 0x80
+; GFX11-NEXT: scratch_store_b128 off, v[4:7], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 0x80
; GFX11-NEXT: v_mov_b32_e32 v5, v16
-; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0
+; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2
; GFX11-NEXT: v_mov_b32_e32 v0, 24
-; GFX11-NEXT: s_add_i32 s0, s32, 0x70
+; GFX11-NEXT: s_add_i32 s2, s32, 0x70
; GFX11-NEXT: v_mov_b32_e32 v6, v17
-; GFX11-NEXT: scratch_store_b128 off, v[12:15], s0
+; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2
; GFX11-NEXT: v_mov_b32_e32 v13, v24
-; GFX11-NEXT: s_add_i32 s0, s32, 0x6c
+; GFX11-NEXT: s_add_i32 s2, s32, 0x6c
; GFX11-NEXT: v_mov_b32_e32 v7, v18
-; GFX11-NEXT: scratch_store_b32 off, v0, s0
-; GFX11-NEXT: s_add_i32 s0, s32, 0x60
+; GFX11-NEXT: scratch_store_b32 off, v0, s2
+; GFX11-NEXT: s_add_i32 s2, s32, 0x60
; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26
-; GFX11-NEXT: scratch_store_b96 off, v[56:58], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 0x50
+; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 0x50
; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45
-; GFX11-NEXT: scratch_store_b128 off, v[40:43], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 64
+; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 64
; GFX11-NEXT: v_mov_b32_e32 v14, v25
-; GFX11-NEXT: scratch_store_b128 off, v[52:55], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 48
+; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 48
; GFX11-NEXT: v_mov_b32_e32 v16, v27
-; GFX11-NEXT: scratch_store_b128 off, v[36:39], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 32
+; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 32
; GFX11-NEXT: v_mov_b32_e32 v30, v46
-; GFX11-NEXT: scratch_store_b128 off, v[48:51], s0
-; GFX11-NEXT: s_add_i32 s0, s32, 16
-; GFX11-NEXT: scratch_store_b128 off, v[32:35], s0
+; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2
+; GFX11-NEXT: s_add_i32 s2, s32, 16
+; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2
; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, 42
@@ -3470,10 +3340,10 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1572
; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1556
; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1540
-; GFX11-NEXT: s_add_i32 s0, s33, 0x400
+; GFX11-NEXT: s_add_i32 s2, s33, 0x400
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_mov_b32_e32 v0, s0
-; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45]
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_clause 0xb
; GFX11-NEXT: scratch_load_b32 v59, off, s33
; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4
@@ -3493,7 +3363,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_addk_i32 s32, 0xf600
-; GFX11-NEXT: s_mov_b32 s33, s46
+; GFX11-NEXT: s_mov_b32 s33, s34
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
index 433a836..3b3e107 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
@@ -33,7 +33,7 @@ define void @func_use_lds_global() {
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX8-SDAG-NEXT: s_mov_b32 m0, -1
-; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0
+; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8
; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -103,7 +103,7 @@ define void @func_use_lds_global_constexpr_cast() {
; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0
+; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8
; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-SDAG-NEXT: s_trap 2
@@ -171,7 +171,7 @@ define void @func_uses_lds_multi(i1 %cond) {
; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_2
; GFX8-SDAG-NEXT: ; %bb.1: ; %bb1
; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 1
-; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8
; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -181,7 +181,7 @@ define void @func_uses_lds_multi(i1 %cond) {
; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_4
; GFX8-SDAG-NEXT: ; %bb.3: ; %bb0
; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0
-; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8
; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -189,7 +189,7 @@ define void @func_uses_lds_multi(i1 %cond) {
; GFX8-SDAG-NEXT: .LBB2_4: ; %ret
; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 2
-; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0
+; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8
; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -379,7 +379,7 @@ define void @func_uses_lds_code_after(ptr addrspace(1) %ptr) {
; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0
; GFX8-SDAG-NEXT: s_mov_b32 m0, -1
; GFX8-SDAG-NEXT: ds_write_b32 v0, v2
-; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0
+; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8
; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 1
; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -472,7 +472,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
; GFX8-SDAG-NEXT: ; %bb.1: ; %use.bb
; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX8-SDAG-NEXT: s_mov_b32 m0, -1
-; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0
+; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8
; GFX8-SDAG-NEXT: ds_write_b32 v0, v0
; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
@@ -481,7 +481,6 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX8-SDAG-NEXT: .LBB4_2: ; %ret
; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-GISEL-LABEL: func_uses_lds_phi_after:
@@ -506,7 +505,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8-GISEL-NEXT: .LBB4_2: ; %ret
; GFX8-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-SDAG-LABEL: func_uses_lds_phi_after:
@@ -527,7 +526,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: .LBB4_2: ; %ret
; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: func_uses_lds_phi_after:
@@ -548,7 +547,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: .LBB4_2: ; %ret
; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-LABEL: func_uses_lds_phi_after:
@@ -570,7 +569,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: .LBB4_3: ; %ret
; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
-; SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SDAG-NEXT: s_setpc_b64 s[30:31]
; SDAG-NEXT: .LBB4_4:
; SDAG-NEXT: s_endpgm
@@ -594,7 +593,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) {
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: .LBB4_3: ; %ret
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: .LBB4_4:
; GISEL-NEXT: s_endpgm
@@ -616,6 +615,3 @@ ret:
; CHECK: {{.*}}
; GFX8: {{.*}}
; GFX9: {{.*}}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll
index 5e76dfd..4477f02 100644
--- a/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll
@@ -157,7 +157,6 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) %
; VI-NEXT: .LBB2_2:
; VI-NEXT: s_or_b64 exec, exec, s[6:7]
; VI-NEXT: s_mov_b64 s[6:7], exec
-; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_readfirstlane_b32 s8, v1
; VI-NEXT: v_mbcnt_lo_u32_b32 v1, s6, 0
; VI-NEXT: v_mbcnt_hi_u32_b32 v1, s7, v1
@@ -203,15 +202,14 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) %
; VI-NEXT: ; %bb.7:
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_mov_b32 m0, -1
-; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: ds_add_rtn_f32 v2, v2, v1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: .LBB2_8:
; VI-NEXT: s_or_b64 exec, exec, s[4:5]
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_readfirstlane_b32 s2, v2
; VI-NEXT: v_add_f32_e32 v2, s2, v0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: flat_store_dword v[0:1], v2
@@ -240,7 +238,6 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) %
; GFX9-NEXT: .LBB2_2:
; GFX9-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX9-NEXT: s_mov_b64 s[6:7], exec
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s8, v1
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v1, s6, 0
; GFX9-NEXT: v_mbcnt_hi_u32_b32 v1, s7, v1
@@ -285,16 +282,15 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) %
; GFX9-NEXT: s_cbranch_execz .LBB2_8
; GFX9-NEXT: ; %bb.7:
; GFX9-NEXT: v_mov_b32_e32 v2, s2
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: ds_add_rtn_f32 v2, v2, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: .LBB2_8:
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s2, v2
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_add_f32_e32 v0, s2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 138dd53..d19ef75 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1260,8 +1260,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB11_5: ; %end
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB11_6:
; GFX11-NEXT: s_mov_b64 exec, 0
@@ -1525,8 +1523,6 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2,
; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB13_6:
; GFX11-NEXT: s_mov_b64 exec, 0
diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
index eef5f57..ecebbb9 100644
--- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
+++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -32,7 +32,7 @@ define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; GCN-NEXT: S_WAITCNT_soft 3952
+ ; GCN-NEXT: S_WAITCNT 3952
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.3:
entry:
@@ -79,7 +79,7 @@ define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a,
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
- ; GCN-NEXT: S_WAITCNT_soft 3952
+ ; GCN-NEXT: S_WAITCNT 3952
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.5:
entry:
diff --git a/llvm/test/CodeGen/Generic/allow-check.ll b/llvm/test/CodeGen/Generic/allow-check.ll
index 43dab68..a084889 100644
--- a/llvm/test/CodeGen/Generic/allow-check.ll
+++ b/llvm/test/CodeGen/Generic/allow-check.ll
@@ -2,6 +2,7 @@
; REQUIRES: host-byteorder-little-endian
; -global-isel=1 is unsupported.
+; XFAIL: target=loongarch{{.*}}
; XFAIL: target=nvptx{{.*}}
; XFAIL: target=sparc{{.*}}
; XFAIL: target=hexagon-{{.*}}
diff --git a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
index b7f8b8a..8980049 100644
--- a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
+++ b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
;RUN: llc < %s --mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=BE
;RUN: llc < %s --mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=LE
+;RUN: llc < %s --mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -ppc-gather-alias-max-depth=0 | FileCheck %s -check-prefix=FORWARD
define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) {
; BE-LABEL: test_large_vec_vaarg:
@@ -35,6 +36,22 @@ define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) {
; LE-NEXT: lxvd2x 0, 0, 3
; LE-NEXT: xxswapd 35, 0
; LE-NEXT: blr
+;
+; FORWARD-LABEL: test_large_vec_vaarg:
+; FORWARD: # %bb.0:
+; FORWARD-NEXT: ld 3, -8(1)
+; FORWARD-NEXT: addi 3, 3, 15
+; FORWARD-NEXT: rldicr 3, 3, 0, 59
+; FORWARD-NEXT: addi 4, 3, 16
+; FORWARD-NEXT: std 4, -8(1)
+; FORWARD-NEXT: ld 4, -8(1)
+; FORWARD-NEXT: lvx 2, 0, 3
+; FORWARD-NEXT: addi 4, 4, 15
+; FORWARD-NEXT: rldicr 3, 4, 0, 59
+; FORWARD-NEXT: addi 4, 3, 16
+; FORWARD-NEXT: std 4, -8(1)
+; FORWARD-NEXT: lvx 3, 0, 3
+; FORWARD-NEXT: blr
%args = alloca ptr, align 4
%x = va_arg ptr %args, <8 x i32>
ret <8 x i32> %x
diff --git a/llvm/test/CodeGen/PowerPC/sms-regpress.mir b/llvm/test/CodeGen/PowerPC/sms-regpress.mir
index cebd78a..b01115c 100644
--- a/llvm/test/CodeGen/PowerPC/sms-regpress.mir
+++ b/llvm/test/CodeGen/PowerPC/sms-regpress.mir
@@ -1,41 +1,30 @@
-# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s
+# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s
# REQUIRES: asserts
# Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues.
# The specific value of II is not important.
-# CHECK: Try to schedule with 21
-# CHECK: Can't schedule
-# CHECK: Try to schedule with 22
-# CHECK: Can't schedule
-# CHECK: Try to schedule with 23
-# CHECK: Rejected the schedule because of too high register pressure
-# CHECK: Try to schedule with 24
-# CHECK: Rejected the schedule because of too high register pressure
-# CHECK: Try to schedule with 25
-# CHECK: Rejected the schedule because of too high register pressure
-# CHECK: Try to schedule with 26
-# CHECK: Schedule Found? 1 (II=26)
+# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}}
+# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}}
+# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}}
+# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}}
--- |
- ; ModuleID = 'a.ll'
- source_filename = "a.c"
target datalayout = "e-m:e-Fn32-i64:64-n32:64"
target triple = "ppc64le"
- ; Function Attrs: nofree nosync nounwind memory(argmem: read) uwtable
- define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr #0 {
+ define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr {
entry:
- %0 = load double, ptr %a, align 8, !tbaa !3
- %arrayidx1 = getelementptr inbounds double, ptr %a, i64 1
- %1 = load double, ptr %arrayidx1, align 8, !tbaa !3
+ %0 = load double, ptr %a, align 8
+ %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8
+ %1 = load double, ptr %arrayidx1, align 8
%cmp163 = icmp sgt i32 %n, 0
br i1 %cmp163, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
- %wide.trip.count = zext i32 %n to i64
- %scevgep1 = getelementptr i8, ptr %b, i64 -8
+ %wide.trip.count = zext nneg i32 %n to i64
+ %scevgep167 = getelementptr i8, ptr %b, i64 -8
call void @llvm.set.loop.iterations.i64(i64 %wide.trip.count)
br label %for.body
@@ -43,11 +32,11 @@
%res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %30, %for.body ]
ret double %res.0.lcssa
- for.body: ; preds = %for.body, %for.body.preheader
+ for.body: ; preds = %for.body.preheader, %for.body
%res.0165 = phi double [ 0.000000e+00, %for.body.preheader ], [ %30, %for.body ]
- %2 = phi ptr [ %scevgep1, %for.body.preheader ], [ %3, %for.body ]
+ %2 = phi ptr [ %scevgep167, %for.body.preheader ], [ %3, %for.body ]
%3 = getelementptr i8, ptr %2, i64 8
- %4 = load double, ptr %3, align 8, !tbaa !3
+ %4 = load double, ptr %3, align 8
%5 = tail call double @llvm.fmuladd.f64(double %0, double %4, double %0)
%6 = tail call double @llvm.fmuladd.f64(double %5, double %4, double %5)
%7 = tail call double @llvm.fmuladd.f64(double %6, double %4, double %6)
@@ -92,152 +81,23 @@
%mul66 = fmul double %12, %mul65
%30 = tail call double @llvm.fmuladd.f64(double %mul66, double %10, double %res.0165)
%31 = call i1 @llvm.loop.decrement.i64(i64 1)
- br i1 %31, label %for.body, label %for.cond.cleanup, !llvm.loop !7
+ br i1 %31, label %for.body, label %for.cond.cleanup
}
- ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
- declare double @llvm.fmuladd.f64(double, double, double) #1
+ declare double @llvm.fmuladd.f64(double, double, double)
- ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
- declare void @llvm.set.loop.iterations.i64(i64) #2
+ declare void @llvm.set.loop.iterations.i64(i64)
- ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
- declare i1 @llvm.loop.decrement.i64(i64) #2
+ declare i1 @llvm.loop.decrement.i64(i64)
- attributes #0 = { nofree nosync nounwind memory(argmem: read) uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-aix-small-local-exec-tls,-privileged,-rop-protect,-spe" }
- attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
- attributes #2 = { nocallback noduplicate nofree nosync nounwind willreturn }
-
- !llvm.module.flags = !{!0, !1}
- !llvm.ident = !{!2}
-
- !0 = !{i32 1, !"wchar_size", i32 4}
- !1 = !{i32 7, !"uwtable", i32 2}
- !2 = !{!"clang version 18.0.0 (https://miratech-soft@dev.azure.com/miratech-soft/llvm/_git/llvm c8d01fb665fc5d9378100a6d92ebcd3be49be655)"}
- !3 = !{!4, !4, i64 0}
- !4 = !{!"double", !5, i64 0}
- !5 = !{!"omnipotent char", !6, i64 0}
- !6 = !{!"Simple C/C++ TBAA"}
- !7 = distinct !{!7, !8, !9}
- !8 = !{!"llvm.loop.mustprogress"}
- !9 = !{!"llvm.loop.unroll.disable"}
-
...
---
name: kernel
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
tracksRegLiveness: true
-hasWinCFI: false
-callsEHReturn: false
-callsUnwindInit: false
-hasEHCatchret: false
-hasEHScopes: false
-hasEHFunclets: false
-isOutlined: false
-debugInstrRef: false
-failsVerification: false
-tracksDebugUserValues: false
-registers:
- - { id: 0, class: vsfrc, preferred-register: '' }
- - { id: 1, class: vsfrc, preferred-register: '' }
- - { id: 2, class: g8rc, preferred-register: '' }
- - { id: 3, class: vsfrc, preferred-register: '' }
- - { id: 4, class: vsfrc, preferred-register: '' }
- - { id: 5, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- - { id: 6, class: g8rc, preferred-register: '' }
- - { id: 7, class: vsfrc, preferred-register: '' }
- - { id: 8, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- - { id: 9, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- - { id: 10, class: g8rc, preferred-register: '' }
- - { id: 11, class: gprc, preferred-register: '' }
- - { id: 12, class: vsfrc, preferred-register: '' }
- - { id: 13, class: crrc, preferred-register: '' }
- - { id: 14, class: vsfrc, preferred-register: '' }
- - { id: 15, class: g8rc, preferred-register: '' }
- - { id: 16, class: g8rc, preferred-register: '' }
- - { id: 17, class: g8rc, preferred-register: '' }
- - { id: 18, class: f8rc, preferred-register: '' }
- - { id: 19, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- - { id: 20, class: vsfrc, preferred-register: '' }
- - { id: 21, class: vsfrc, preferred-register: '' }
- - { id: 22, class: vsfrc, preferred-register: '' }
- - { id: 23, class: vsfrc, preferred-register: '' }
- - { id: 24, class: vsfrc, preferred-register: '' }
- - { id: 25, class: vsfrc, preferred-register: '' }
- - { id: 26, class: vsfrc, preferred-register: '' }
- - { id: 27, class: vsfrc, preferred-register: '' }
- - { id: 28, class: vsfrc, preferred-register: '' }
- - { id: 29, class: vsfrc, preferred-register: '' }
- - { id: 30, class: vsfrc, preferred-register: '' }
- - { id: 31, class: vsfrc, preferred-register: '' }
- - { id: 32, class: vsfrc, preferred-register: '' }
- - { id: 33, class: vsfrc, preferred-register: '' }
- - { id: 34, class: vsfrc, preferred-register: '' }
- - { id: 35, class: vsfrc, preferred-register: '' }
- - { id: 36, class: vsfrc, preferred-register: '' }
- - { id: 37, class: vsfrc, preferred-register: '' }
- - { id: 38, class: vsfrc, preferred-register: '' }
- - { id: 39, class: vsfrc, preferred-register: '' }
- - { id: 40, class: vsfrc, preferred-register: '' }
- - { id: 41, class: vsfrc, preferred-register: '' }
- - { id: 42, class: vsfrc, preferred-register: '' }
- - { id: 43, class: vsfrc, preferred-register: '' }
- - { id: 44, class: vsfrc, preferred-register: '' }
- - { id: 45, class: vsfrc, preferred-register: '' }
- - { id: 46, class: vsfrc, preferred-register: '' }
- - { id: 47, class: vsfrc, preferred-register: '' }
- - { id: 48, class: vsfrc, preferred-register: '' }
- - { id: 49, class: vsfrc, preferred-register: '' }
- - { id: 50, class: vsfrc, preferred-register: '' }
- - { id: 51, class: vsfrc, preferred-register: '' }
- - { id: 52, class: vsfrc, preferred-register: '' }
- - { id: 53, class: vsfrc, preferred-register: '' }
- - { id: 54, class: vsfrc, preferred-register: '' }
- - { id: 55, class: vsfrc, preferred-register: '' }
- - { id: 56, class: vsfrc, preferred-register: '' }
- - { id: 57, class: vsfrc, preferred-register: '' }
- - { id: 58, class: vsfrc, preferred-register: '' }
- - { id: 59, class: vsfrc, preferred-register: '' }
- - { id: 60, class: vsfrc, preferred-register: '' }
- - { id: 61, class: vsfrc, preferred-register: '' }
- - { id: 62, class: crbitrc, preferred-register: '' }
liveins:
- { reg: '$x3', virtual-reg: '%8' }
- { reg: '$x4', virtual-reg: '%9' }
- { reg: '$x5', virtual-reg: '%10' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 1
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- functionContext: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- hasTailCall: false
- localFrameSize: 0
- savePoint: ''
- restorePoint: ''
-fixedStack: []
-stack: []
-entry_values: []
-callSites: []
-debugValueSubstitutions: []
-constants: []
-machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.2(0x50000000), %bb.1(0x30000000)
@@ -251,16 +111,12 @@ body: |
BCC 44, killed %13, %bb.2
bb.1:
- successors: %bb.3(0x80000000)
-
%12:vsfrc = XXLXORdpz
B %bb.3
bb.2.for.body.preheader:
- successors: %bb.4(0x80000000)
-
- %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a, !tbaa !3)
- %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1, !tbaa !3)
+ %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a)
+ %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1)
%16:g8rc = IMPLICIT_DEF
%15:g8rc = INSERT_SUBREG killed %16, killed %11, %subreg.sub_32
%17:g8rc = RLDICL killed %15, 0, 32
@@ -279,7 +135,7 @@ body: |
%4:vsfrc = PHI %14, %bb.2, %7, %bb.4
%5:g8rc_and_g8rc_nox0 = PHI %2, %bb.2, %6, %bb.4
- %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3, !tbaa !3)
+ %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3)
%6:g8rc = COPY killed %19
%20:vsfrc = nofpexcept XSMADDADP %0, %0, %18, implicit $rm
%21:vsfrc = nofpexcept XSMADDADP %20, %20, %18, implicit $rm
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir
new file mode 100644
index 0000000..eda1180
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir
@@ -0,0 +1,902 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir \
+# RUN: -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir \
+# RUN: -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s
+
+---
+name: anyext_nxv1i16_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i16_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i16_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s16>) = G_ANYEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv1i32_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s32>) = G_ANYEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv1i64_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i16_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i16_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i16_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s16>) = G_ANYEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i32_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s32>) = G_ANYEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i64_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s8>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i16_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i16_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i16_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s16>) = G_ANYEXT %0(<vscale x 4 x s8>)
+ $v8 = COPY %1(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv4i32_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s32>) = G_ANYEXT %0(<vscale x 4 x s8>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i64_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s8>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i16_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i16_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i16_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s16>) = G_ANYEXT %0(<vscale x 8 x s8>)
+ $v8m2 = COPY %1(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv8i32_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s32>) = G_ANYEXT %0(<vscale x 8 x s8>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i64_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s8>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv16i16_nxv16i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv16i16_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv16i16_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ %1:vrb(<vscale x 16 x s16>) = G_ANYEXT %0(<vscale x 16 x s8>)
+ $v8m4 = COPY %1(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv16i32_nxv16i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 16 x s8>) = COPY $v8m4
+ %1:vrb(<vscale x 16 x s32>) = G_ANYEXT %0(<vscale x 16 x s8>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv32i16_nxv32i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv32i16_nxv32i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv32i16_nxv32i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ %1:vrb(<vscale x 32 x s16>) = G_ANYEXT %0(<vscale x 32 x s8>)
+ $v8m8 = COPY %1(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv1i32_nxv1i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s16>) = COPY $v8
+ %1:vrb(<vscale x 1 x s32>) = G_ANYEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv1i64_nxv1i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s16>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i32_nxv2i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s16>) = COPY $v8
+ %1:vrb(<vscale x 2 x s32>) = G_ANYEXT %0(<vscale x 2 x s16>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i64_nxv2i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s16>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s16>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i32_nxv4i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 4 x s16>) = COPY $v8
+ %1:vrb(<vscale x 4 x s32>) = G_ANYEXT %0(<vscale x 4 x s16>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i64_nxv4i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s16>) = COPY $v8
+ %1:vrb(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s16>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i32_nxv8i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ %1:vrb(<vscale x 8 x s32>) = G_ANYEXT %0(<vscale x 8 x s16>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i64_nxv8i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ %1:vrb(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s16>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv16i32_nxv16i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ %1:vrb(<vscale x 16 x s32>) = G_ANYEXT %0(<vscale x 16 x s16>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv1i64_nxv1i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s32>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s32>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i64_nxv2i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s32>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s32>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i64_nxv4i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ %1:vrb(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s32>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i64_nxv8i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ %1:vrb(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s32>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir
new file mode 100644
index 0000000..df0d48a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir
@@ -0,0 +1,534 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s
+
+# Don't test i1 element types here since they have been widened to i8 in legalization
+
+---
+name: icmp_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i8
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i8
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 1 x s1>) = G_ICMP intpred(ult), %0(<vscale x 1 x s8>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i8
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i8
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(slt), %0(<vscale x 2 x s8>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i8
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i8
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(uge), %0(<vscale x 4 x s8>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i8
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i8
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sge), %0(<vscale x 8 x s8>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv16i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv16i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv16i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(ugt), %0(<vscale x 16 x s8>), %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv32i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv32i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv32i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s8>), %0
+ $v8 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv64i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv64i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv64i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 64 x s1>) = G_ICMP intpred(ule), %0(<vscale x 64 x s8>), %0
+ $v8 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv1i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i16
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i16
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sle), %0(<vscale x 1 x s16>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i16
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i16
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(ne), %0(<vscale x 2 x s16>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i16
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i16
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(eq), %0(<vscale x 4 x s16>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ult), %0(<vscale x 8 x s16>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv16i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv16i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv16i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(slt), %0(<vscale x 16 x s16>), %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv32i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv32i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv32i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 32 x s1>) = G_ICMP intpred(uge), %0(<vscale x 32 x s16>), %0
+ $v8 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv1i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i32
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i32
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sge), %0(<vscale x 1 x s32>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i32
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i32
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(ugt), %0(<vscale x 2 x s32>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i32
+ ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i32
+ ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s32>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i32
+ ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i32
+ ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ule), %0(<vscale x 8 x s32>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv16i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv16i32
+ ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv16i32
+ ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sle), %0(<vscale x 16 x s32>), %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv1i64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i64
+ ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */
+ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]]
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i64
+ ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */
+ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]]
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 1 x s1>) = G_ICMP intpred(eq), %0(<vscale x 1 x s64>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i64
+ ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i64
+ ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(ne), %0(<vscale x 2 x s64>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i64
+ ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i64
+ ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(ult), %0(<vscale x 4 x s64>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i64
+ ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i64
+ ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ult), %0(<vscale x 8 x s64>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir
new file mode 100644
index 0000000..382166f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir
@@ -0,0 +1,900 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s
+
+---
+name: sext_nxv1i16_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i16_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i16_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s16>) = G_SEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv1i32_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i32_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i32_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s32>) = G_SEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv1i64_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i64_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i64_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i16_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i16_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i16_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s16>) = G_SEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i32_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i32_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i32_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s32>) = G_SEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i64_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i64_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i64_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s8>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i16_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i16_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i16_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s16>) = G_SEXT %0(<vscale x 4 x s8>)
+ $v8 = COPY %1(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv4i32_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i32_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i32_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s32>) = G_SEXT %0(<vscale x 4 x s8>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i64_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i64_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i64_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s8>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i16_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i16_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i16_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s16>) = G_SEXT %0(<vscale x 8 x s8>)
+ $v8m2 = COPY %1(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv8i32_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i32_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i32_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s32>) = G_SEXT %0(<vscale x 8 x s8>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i64_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i64_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i64_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s8>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv16i16_nxv16i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv16i16_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv16i16_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ %1:vrb(<vscale x 16 x s16>) = G_SEXT %0(<vscale x 16 x s8>)
+ $v8m4 = COPY %1(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv16i32_nxv16i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv16i32_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv16i32_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ %1:vrb(<vscale x 16 x s32>) = G_SEXT %0(<vscale x 16 x s8>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv32i16_nxv32i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv32i16_nxv32i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv32i16_nxv32i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ %1:vrb(<vscale x 32 x s16>) = G_SEXT %0(<vscale x 32 x s8>)
+ $v8m8 = COPY %1(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv1i32_nxv1i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i32_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i32_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s16>) = COPY $v8
+ %1:vrb(<vscale x 1 x s32>) = G_SEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv1i64_nxv1i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i64_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i64_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s16>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i32_nxv2i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i32_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i32_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s16>) = COPY $v8
+ %1:vrb(<vscale x 2 x s32>) = G_SEXT %0(<vscale x 2 x s16>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i64_nxv2i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i64_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i64_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s16>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s16>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i32_nxv4i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i32_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i32_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 4 x s16>) = COPY $v8
+ %1:vrb(<vscale x 4 x s32>) = G_SEXT %0(<vscale x 4 x s16>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i64_nxv4i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i64_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i64_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s16>) = COPY $v8
+ %1:vrb(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s16>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i32_nxv8i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i32_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i32_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ %1:vrb(<vscale x 8 x s32>) = G_SEXT %0(<vscale x 8 x s16>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i64_nxv8i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i64_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i64_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ %1:vrb(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s16>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv16i32_nxv16i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv16i32_nxv16i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv16i32_nxv16i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ %1:vrb(<vscale x 16 x s32>) = G_SEXT %0(<vscale x 16 x s16>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv1i64_nxv1i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i64_nxv1i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i64_nxv1i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s32>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s32>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i64_nxv2i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i64_nxv2i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i64_nxv2i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s32>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s32>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i64_nxv4i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i64_nxv4i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i64_nxv4i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ %1:vrb(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s32>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i64_nxv8i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i64_nxv8i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i64_nxv8i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ %1:vrb(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s32>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir
new file mode 100644
index 0000000..2fc9e05
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir
@@ -0,0 +1,900 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s
+
+---
+name: zext_nxv1i16_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i16_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i16_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s16>) = G_ZEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv1i32_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i32_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i32_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s32>) = G_ZEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv1i64_nxv1i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i64_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i64_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s8>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i16_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i16_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i16_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s16>) = G_ZEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i32_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i32_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i32_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s32>) = G_ZEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i64_nxv2i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i64_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i64_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s8>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s8>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i16_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i16_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i16_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s16>) = G_ZEXT %0(<vscale x 4 x s8>)
+ $v8 = COPY %1(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv4i32_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i32_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i32_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s32>) = G_ZEXT %0(<vscale x 4 x s8>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i64_nxv4i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i64_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i64_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s8>) = COPY $v8
+ %1:vrb(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s8>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i16_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i16_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i16_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s16>) = G_ZEXT %0(<vscale x 8 x s8>)
+ $v8m2 = COPY %1(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv8i32_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i32_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i32_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s32>) = G_ZEXT %0(<vscale x 8 x s8>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i64_nxv8i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i64_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i64_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s8>) = COPY $v8
+ %1:vrb(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s8>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv16i16_nxv16i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv16i16_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv16i16_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ %1:vrb(<vscale x 16 x s16>) = G_ZEXT %0(<vscale x 16 x s8>)
+ $v8m4 = COPY %1(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv16i32_nxv16i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv16i32_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv16i32_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ %1:vrb(<vscale x 16 x s32>) = G_ZEXT %0(<vscale x 16 x s8>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv32i16_nxv32i8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv32i16_nxv32i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv32i16_nxv32i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ %1:vrb(<vscale x 32 x s16>) = G_ZEXT %0(<vscale x 32 x s8>)
+ $v8m8 = COPY %1(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv1i32_nxv1i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i32_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i32_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s16>) = COPY $v8
+ %1:vrb(<vscale x 1 x s32>) = G_ZEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv1i64_nxv1i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i64_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i64_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s16>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i32_nxv2i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i32_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i32_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 2 x s16>) = COPY $v8
+ %1:vrb(<vscale x 2 x s32>) = G_ZEXT %0(<vscale x 2 x s16>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i64_nxv2i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i64_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i64_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s16>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s16>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i32_nxv4i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i32_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i32_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 4 x s16>) = COPY $v8
+ %1:vrb(<vscale x 4 x s32>) = G_ZEXT %0(<vscale x 4 x s16>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i64_nxv4i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i64_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i64_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s16>) = COPY $v8
+ %1:vrb(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s16>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i32_nxv8i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i32_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i32_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ %1:vrb(<vscale x 8 x s32>) = G_ZEXT %0(<vscale x 8 x s16>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i64_nxv8i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i64_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i64_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s16>) = COPY $v8m4
+ %1:vrb(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s16>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv16i32_nxv16i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv16i32_nxv16i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv16i32_nxv16i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ %1:vrb(<vscale x 16 x s32>) = G_ZEXT %0(<vscale x 16 x s16>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv1i64_nxv1i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i64_nxv1i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i64_nxv1i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:vrb(<vscale x 1 x s32>) = COPY $v8
+ %1:vrb(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s32>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i64_nxv2i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i64_nxv2i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m2 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i64_nxv2i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m2 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:vrb(<vscale x 2 x s32>) = COPY $v8
+ %1:vrb(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s32>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i64_nxv4i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i64_nxv4i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m4 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i64_nxv4i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m4 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ %1:vrb(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s32>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i64_nxv8i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i64_nxv8i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV32I-NEXT: $v8m8 = COPY %1
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i64_nxv8i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4
+ ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+ ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */
+ ; RV64I-NEXT: $v8m8 = COPY %1
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ %1:vrb(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s32>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir
new file mode 100644
index 0000000..3a2d40f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir
@@ -0,0 +1,1589 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s
+
+# Extend from s1 element vectors
+---
+name: anyext_nxv1i8_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv1i8_nxv1i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i8_nxv1i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v0
+ %0:_(<vscale x 1 x s8>) = G_ANYEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv1i16_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv1i16_nxv1i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i16_nxv1i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v0
+ %0:_(<vscale x 1 x s16>) = G_ANYEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv1i32_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv1i32_nxv1i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i32_nxv1i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v0
+ %0:_(<vscale x 1 x s32>) = G_ANYEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv1i64_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv1i64_nxv1i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i64_nxv1i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v0
+ %0:_(<vscale x 1 x s64>) = G_ANYEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i8_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv2i8_nxv2i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i8_nxv2i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v0
+ %0:_(<vscale x 2 x s8>) = G_ANYEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i16_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv2i16_nxv2i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i16_nxv2i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v0
+ %0:_(<vscale x 2 x s16>) = G_ANYEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i32_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv2i32_nxv2i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i32_nxv2i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v0
+ %0:_(<vscale x 2 x s32>) = G_ANYEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i64_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv2i64_nxv2i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i64_nxv2i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s1>) = COPY $v0
+ %0:_(<vscale x 2 x s64>) = G_ANYEXT %1(<vscale x 2 x s1>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv4i8_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv4i8_nxv4i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i8_nxv4i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s1>) = COPY $v0
+ %0:_(<vscale x 4 x s8>) = G_ANYEXT %1(<vscale x 4 x s1>)
+ $v8 = COPY %0(<vscale x 4 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv4i16_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv4i16_nxv4i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i16_nxv4i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s1>) = COPY $v0
+ %0:_(<vscale x 4 x s16>) = G_ANYEXT %1(<vscale x 4 x s1>)
+ $v8 = COPY %0(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv4i32_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv4i32_nxv4i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i32_nxv4i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s1>) = COPY $v0
+ %0:_(<vscale x 4 x s32>) = G_ANYEXT %1(<vscale x 4 x s1>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv4i64_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv4i64_nxv4i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i64_nxv4i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s1>) = COPY $v0
+ %0:_(<vscale x 4 x s64>) = G_ANYEXT %1(<vscale x 4 x s1>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv8i8_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv8i8_nxv8i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i8_nxv8i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 8 x s1>) = COPY $v0
+ %0:_(<vscale x 8 x s8>) = G_ANYEXT %1(<vscale x 8 x s1>)
+ $v8 = COPY %0(<vscale x 8 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv8i16_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv8i16_nxv8i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i16_nxv8i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 8 x s1>) = COPY $v0
+ %0:_(<vscale x 8 x s16>) = G_ANYEXT %1(<vscale x 8 x s1>)
+ $v8m2 = COPY %0(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv8i32_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv8i32_nxv8i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i32_nxv8i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s1>) = COPY $v0
+ %0:_(<vscale x 8 x s32>) = G_ANYEXT %1(<vscale x 8 x s1>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv8i64_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv8i64_nxv8i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i64_nxv8i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s1>) = COPY $v0
+ %0:_(<vscale x 8 x s64>) = G_ANYEXT %1(<vscale x 8 x s1>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: anyext_nxv16i8_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv16i8_nxv16i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv16i8_nxv16i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 16 x s1>) = COPY $v0
+ %0:_(<vscale x 16 x s8>) = G_ANYEXT %1(<vscale x 16 x s1>)
+ $v8m2 = COPY %0(<vscale x 16 x s8>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv16i16_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv16i16_nxv16i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv16i16_nxv16i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 16 x s1>) = COPY $v0
+ %0:_(<vscale x 16 x s16>) = G_ANYEXT %1(<vscale x 16 x s1>)
+ $v8m4 = COPY %0(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv16i32_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv16i32_nxv16i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv16i32_nxv16i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s1>) = COPY $v0
+ %0:_(<vscale x 16 x s32>) = G_ANYEXT %1(<vscale x 16 x s1>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+---
+name: anyext_nxv32i8_nxv32i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv32i8_nxv32i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv32i8_nxv32i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 32 x s1>) = COPY $v0
+ %0:_(<vscale x 32 x s8>) = G_ANYEXT %1(<vscale x 32 x s1>)
+ $v8m4 = COPY %0(<vscale x 32 x s8>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv32i16_nxv32i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv32i16_nxv32i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv32i16_nxv32i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 32 x s1>) = COPY $v0
+ %0:_(<vscale x 32 x s16>) = G_ANYEXT %1(<vscale x 32 x s1>)
+ $v8m8 = COPY %0(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+...
+---
+name: anyext_nxv64i8_nxv64i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: anyext_nxv64i8_nxv64i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv64i8_nxv64i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 64 x s1>) = COPY $v0
+ %0:_(<vscale x 64 x s8>) = G_ANYEXT %1(<vscale x 64 x s1>)
+ $v8m8 = COPY %0(<vscale x 64 x s8>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s8 element vectors
+---
+name: anyext_nxv1i16_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv1i16_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i16_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s16>) = G_ANYEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv1i32_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv1i32_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i32_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s32>) = G_ANYEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv1i64_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv1i64_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i64_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_ANYEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i16_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv2i16_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i16_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s16>) = G_ANYEXT %1(<vscale x 2 x s8>)
+ $v8 = COPY %0(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i32_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv2i32_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i32_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s32>) = G_ANYEXT %1(<vscale x 2 x s8>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i64_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv2i64_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i64_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_ANYEXT %1(<vscale x 2 x s8>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv4i16_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv4i16_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 4 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i16_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 4 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s16>) = G_ANYEXT %1(<vscale x 4 x s8>)
+ $v8 = COPY %0(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv4i32_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv4i32_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i32_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s32>) = G_ANYEXT %1(<vscale x 4 x s8>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv4i64_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv4i64_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i64_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s64>) = G_ANYEXT %1(<vscale x 4 x s8>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv8i16_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv8i16_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 8 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i16_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 8 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s16>) = G_ANYEXT %1(<vscale x 8 x s8>)
+ $v8m2 = COPY %0(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv8i32_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv8i32_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i32_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s32>) = G_ANYEXT %1(<vscale x 8 x s8>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv8i64_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv8i64_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i64_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s64>) = G_ANYEXT %1(<vscale x 8 x s8>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: anyext_nxv16i16_nxv16i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv16i16_nxv16i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 16 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv16i16_nxv16i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 16 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 16 x s8>) = COPY $v8m2
+ %0:_(<vscale x 16 x s16>) = G_ANYEXT %1(<vscale x 16 x s8>)
+ $v8m4 = COPY %0(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv16i32_nxv16i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv16i32_nxv16i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m4
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv16i32_nxv16i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m4
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s8>) = COPY $v8m4
+ %0:_(<vscale x 16 x s32>) = G_ANYEXT %1(<vscale x 16 x s8>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+---
+name: anyext_nxv32i16_nxv32i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv32i16_nxv32i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_ANYEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 32 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv32i16_nxv32i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_ANYEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 32 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 32 x s8>) = COPY $v8m4
+ %0:_(<vscale x 32 x s16>) = G_ANYEXT %1(<vscale x 32 x s8>)
+ $v8m8 = COPY %0(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s16 element vectors
+---
+name: anyext_nxv1i32_nxv1i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv1i32_nxv1i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i32_nxv1i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s16>) = COPY $v8
+ %0:_(<vscale x 1 x s32>) = G_ANYEXT %1(<vscale x 1 x s16>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv1i64_nxv1i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv1i64_nxv1i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i64_nxv1i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s16>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_ANYEXT %1(<vscale x 1 x s16>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i32_nxv2i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv2i32_nxv2i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i32_nxv2i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s16>) = COPY $v8
+ %0:_(<vscale x 2 x s32>) = G_ANYEXT %1(<vscale x 2 x s16>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i64_nxv2i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv2i64_nxv2i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i64_nxv2i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s16>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_ANYEXT %1(<vscale x 2 x s16>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv4i32_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv4i32_nxv4i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i32_nxv4i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s16>) = COPY $v8
+ %0:_(<vscale x 4 x s32>) = G_ANYEXT %1(<vscale x 4 x s16>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv4i64_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv4i64_nxv4i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i64_nxv4i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s16>) = COPY $v8
+ %0:_(<vscale x 4 x s64>) = G_ANYEXT %1(<vscale x 4 x s16>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv8i32_nxv8i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv8i32_nxv8i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i32_nxv8i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s16>) = COPY $v8m2
+ %0:_(<vscale x 8 x s32>) = G_ANYEXT %1(<vscale x 8 x s16>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv8i64_nxv8i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv8i64_nxv8i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i64_nxv8i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s16>) = COPY $v8m2
+ %0:_(<vscale x 8 x s64>) = G_ANYEXT %1(<vscale x 8 x s16>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: anyext_nxv16i32_nxv16i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv16i32_nxv16i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv16i32_nxv16i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s16>) = COPY $v8m4
+ %0:_(<vscale x 16 x s32>) = G_ANYEXT %1(<vscale x 16 x s16>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s32 element vectors
+---
+name: anyext_nxv1i64_nxv1i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv1i64_nxv1i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV32-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: anyext_nxv1i64_nxv1i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV64-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s32>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_ANYEXT %1(<vscale x 1 x s32>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: anyext_nxv2i64_nxv2i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv2i64_nxv2i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: anyext_nxv2i64_nxv2i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s32>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_ANYEXT %1(<vscale x 2 x s32>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: anyext_nxv4i64_nxv4i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv4i64_nxv4i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: anyext_nxv4i64_nxv4i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s32>) = COPY $v8m2
+ %0:_(<vscale x 4 x s64>) = G_ANYEXT %1(<vscale x 4 x s32>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: anyext_nxv8i64_nxv8i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: anyext_nxv8i64_nxv8i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: anyext_nxv8i64_nxv8i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s32>) = COPY $v8m4
+ %0:_(<vscale x 8 x s64>) = G_ANYEXT %1(<vscale x 8 x s32>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir
new file mode 100644
index 0000000..d1df954
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir
@@ -0,0 +1,810 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s
+
+---
+name: icmp_nxv1i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv1i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[DEF]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[DEF]](<vscale x 1 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 1 x s8>), [[SELECT1]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv1i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[DEF]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[DEF]](<vscale x 1 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 1 x s8>), [[SELECT1]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv2i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv2i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 2 x s8>), [[SELECT1]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv2i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 2 x s8>), [[SELECT1]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv4i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv4i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 4 x s8>), [[SELECT1]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv4i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 4 x s8>), [[SELECT1]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv8i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv8i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 8 x s8>), [[SELECT1]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv8i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 8 x s8>), [[SELECT1]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv16i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv16i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[DEF]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[DEF]](<vscale x 16 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 16 x s8>), [[SELECT1]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv16i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[DEF]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[DEF]](<vscale x 16 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 16 x s8>), [[SELECT1]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv32i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv32i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[DEF]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[DEF]](<vscale x 32 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 32 x s8>), [[SELECT1]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv32i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[DEF]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[DEF]](<vscale x 32 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 32 x s8>), [[SELECT1]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv64i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv64i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 64 x s8>), [[SELECT1]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv64i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[SELECT]](<vscale x 64 x s8>), [[SELECT1]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv1i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s8>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv1i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s8>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv2i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s8>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv2i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s8>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv4i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s8>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv4i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s8>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv8i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s8>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv8i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s8>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv16i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv16i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s8>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv16i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s8>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv32i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv32i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s8>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv32i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s8>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv64i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv64i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s8>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv64i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s8>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv1i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv1i16
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s16>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv1i16
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s16>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv2i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv2i16
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s16>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv2i16
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s16>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv4i16
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s16>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv4i16
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s16>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv8i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv8i16
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s16>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv8i16
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s16>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv16i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv16i16
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s16>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv16i16
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s16>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv32i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv32i16
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s16>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv32i16
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s16>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv1i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv1i32
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s32>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv1i32
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s32>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv2i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv2i32
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s32>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv2i32
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s32>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv4i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv4i32
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s32>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv4i32
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s32>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv8i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv8i32
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s32>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv8i32
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s32>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv16i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv16i32
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s32>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv16i32
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s32>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv1i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv1i64
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s64>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv1i64
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s64>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv2i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv2i64
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s64>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv2i64
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s64>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv4i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv4i64
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s64>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv4i64
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s64>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: icmp_nxv8i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: icmp_nxv8i64
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s64>), [[DEF]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: icmp_nxv8i64
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s64>), [[DEF]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0, %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir
new file mode 100644
index 0000000..1571daf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir
@@ -0,0 +1,1589 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s
+
+# Extend from s1 element vectors
+---
+name: sext_nxv1i8_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv1i8_nxv1i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i8_nxv1i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v0
+ %0:_(<vscale x 1 x s8>) = G_SEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv1i16_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv1i16_nxv1i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i16_nxv1i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v0
+ %0:_(<vscale x 1 x s16>) = G_SEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv1i32_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv1i32_nxv1i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i32_nxv1i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v0
+ %0:_(<vscale x 1 x s32>) = G_SEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv1i64_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv1i64_nxv1i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i64_nxv1i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v0
+ %0:_(<vscale x 1 x s64>) = G_SEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i8_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv2i8_nxv2i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv2i8_nxv2i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v0
+ %0:_(<vscale x 2 x s8>) = G_SEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i16_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv2i16_nxv2i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv2i16_nxv2i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v0
+ %0:_(<vscale x 2 x s16>) = G_SEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i32_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv2i32_nxv2i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv2i32_nxv2i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v0
+ %0:_(<vscale x 2 x s32>) = G_SEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i64_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv2i64_nxv2i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv2i64_nxv2i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s1>) = COPY $v0
+ %0:_(<vscale x 2 x s64>) = G_SEXT %1(<vscale x 2 x s1>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv4i8_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv4i8_nxv4i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv4i8_nxv4i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s1>) = COPY $v0
+ %0:_(<vscale x 4 x s8>) = G_SEXT %1(<vscale x 4 x s1>)
+ $v8 = COPY %0(<vscale x 4 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv4i16_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv4i16_nxv4i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv4i16_nxv4i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s1>) = COPY $v0
+ %0:_(<vscale x 4 x s16>) = G_SEXT %1(<vscale x 4 x s1>)
+ $v8 = COPY %0(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv4i32_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv4i32_nxv4i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv4i32_nxv4i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s1>) = COPY $v0
+ %0:_(<vscale x 4 x s32>) = G_SEXT %1(<vscale x 4 x s1>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv4i64_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv4i64_nxv4i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv4i64_nxv4i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s1>) = COPY $v0
+ %0:_(<vscale x 4 x s64>) = G_SEXT %1(<vscale x 4 x s1>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv8i8_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv8i8_nxv8i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv8i8_nxv8i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 8 x s1>) = COPY $v0
+ %0:_(<vscale x 8 x s8>) = G_SEXT %1(<vscale x 8 x s1>)
+ $v8 = COPY %0(<vscale x 8 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv8i16_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv8i16_nxv8i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv8i16_nxv8i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 8 x s1>) = COPY $v0
+ %0:_(<vscale x 8 x s16>) = G_SEXT %1(<vscale x 8 x s1>)
+ $v8m2 = COPY %0(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv8i32_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv8i32_nxv8i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv8i32_nxv8i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s1>) = COPY $v0
+ %0:_(<vscale x 8 x s32>) = G_SEXT %1(<vscale x 8 x s1>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv8i64_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv8i64_nxv8i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv8i64_nxv8i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s1>) = COPY $v0
+ %0:_(<vscale x 8 x s64>) = G_SEXT %1(<vscale x 8 x s1>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: sext_nxv16i8_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv16i8_nxv16i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv16i8_nxv16i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 16 x s1>) = COPY $v0
+ %0:_(<vscale x 16 x s8>) = G_SEXT %1(<vscale x 16 x s1>)
+ $v8m2 = COPY %0(<vscale x 16 x s8>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv16i16_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv16i16_nxv16i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv16i16_nxv16i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 16 x s1>) = COPY $v0
+ %0:_(<vscale x 16 x s16>) = G_SEXT %1(<vscale x 16 x s1>)
+ $v8m4 = COPY %0(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv16i32_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv16i32_nxv16i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv16i32_nxv16i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s1>) = COPY $v0
+ %0:_(<vscale x 16 x s32>) = G_SEXT %1(<vscale x 16 x s1>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+---
+name: sext_nxv32i8_nxv32i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv32i8_nxv32i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv32i8_nxv32i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 32 x s1>) = COPY $v0
+ %0:_(<vscale x 32 x s8>) = G_SEXT %1(<vscale x 32 x s1>)
+ $v8m4 = COPY %0(<vscale x 32 x s8>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv32i16_nxv32i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv32i16_nxv32i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv32i16_nxv32i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 32 x s1>) = COPY $v0
+ %0:_(<vscale x 32 x s16>) = G_SEXT %1(<vscale x 32 x s1>)
+ $v8m8 = COPY %0(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+...
+---
+name: sext_nxv64i8_nxv64i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v0
+ ; RV32-LABEL: name: sext_nxv64i8_nxv64i1
+ ; RV32: liveins: $v0
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv64i8_nxv64i1
+ ; RV64: liveins: $v0
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 64 x s1>) = COPY $v0
+ %0:_(<vscale x 64 x s8>) = G_SEXT %1(<vscale x 64 x s1>)
+ $v8m8 = COPY %0(<vscale x 64 x s8>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s8 element vectors
+---
+name: sext_nxv1i16_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv1i16_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i16_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s16>) = G_SEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv1i32_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv1i32_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i32_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s32>) = G_SEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv1i64_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv1i64_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i64_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_SEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i16_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv2i16_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv2i16_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s16>) = G_SEXT %1(<vscale x 2 x s8>)
+ $v8 = COPY %0(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i32_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv2i32_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv2i32_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s32>) = G_SEXT %1(<vscale x 2 x s8>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i64_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv2i64_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv2i64_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_SEXT %1(<vscale x 2 x s8>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv4i16_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv4i16_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 4 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv4i16_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 4 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s16>) = G_SEXT %1(<vscale x 4 x s8>)
+ $v8 = COPY %0(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv4i32_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv4i32_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv4i32_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s32>) = G_SEXT %1(<vscale x 4 x s8>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv4i64_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv4i64_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv4i64_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s64>) = G_SEXT %1(<vscale x 4 x s8>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv8i16_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv8i16_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 8 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv8i16_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 8 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s16>) = G_SEXT %1(<vscale x 8 x s8>)
+ $v8m2 = COPY %0(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv8i32_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv8i32_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv8i32_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s32>) = G_SEXT %1(<vscale x 8 x s8>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv8i64_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv8i64_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv8i64_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s64>) = G_SEXT %1(<vscale x 8 x s8>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: sext_nxv16i16_nxv16i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv16i16_nxv16i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 16 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv16i16_nxv16i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 16 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 16 x s8>) = COPY $v8m2
+ %0:_(<vscale x 16 x s16>) = G_SEXT %1(<vscale x 16 x s8>)
+ $v8m4 = COPY %0(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv16i32_nxv16i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv16i32_nxv16i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv16i32_nxv16i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s8>) = COPY $v8m2
+ %0:_(<vscale x 16 x s32>) = G_SEXT %1(<vscale x 16 x s8>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+---
+name: sext_nxv32i16_nxv32i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv32i16_nxv32i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 32 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv32i16_nxv32i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 32 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 32 x s8>) = COPY $v8m4
+ %0:_(<vscale x 32 x s16>) = G_SEXT %1(<vscale x 32 x s8>)
+ $v8m8 = COPY %0(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s16 element vectors
+---
+name: sext_nxv1i32_nxv1i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv1i32_nxv1i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i32_nxv1i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s16>) = COPY $v8
+ %0:_(<vscale x 1 x s32>) = G_SEXT %1(<vscale x 1 x s16>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv1i64_nxv1i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv1i64_nxv1i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i64_nxv1i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s16>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_SEXT %1(<vscale x 1 x s16>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i32_nxv2i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv2i32_nxv2i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv2i32_nxv2i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s16>) = COPY $v8
+ %0:_(<vscale x 2 x s32>) = G_SEXT %1(<vscale x 2 x s16>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i64_nxv2i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv2i64_nxv2i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv2i64_nxv2i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s16>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_SEXT %1(<vscale x 2 x s16>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv4i32_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv4i32_nxv4i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv4i32_nxv4i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s16>) = COPY $v8
+ %0:_(<vscale x 4 x s32>) = G_SEXT %1(<vscale x 4 x s16>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv4i64_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv4i64_nxv4i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv4i64_nxv4i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s16>) = COPY $v8
+ %0:_(<vscale x 4 x s64>) = G_SEXT %1(<vscale x 4 x s16>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv8i32_nxv8i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv8i32_nxv8i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv8i32_nxv8i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s16>) = COPY $v8m2
+ %0:_(<vscale x 8 x s32>) = G_SEXT %1(<vscale x 8 x s16>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv8i64_nxv8i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv8i64_nxv8i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv8i64_nxv8i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s16>) = COPY $v8m2
+ %0:_(<vscale x 8 x s64>) = G_SEXT %1(<vscale x 8 x s16>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: sext_nxv16i32_nxv16i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv16i32_nxv16i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv16i32_nxv16i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s16>) = COPY $v8m4
+ %0:_(<vscale x 16 x s32>) = G_SEXT %1(<vscale x 16 x s16>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s32 element vectors
+---
+name: sext_nxv1i64_nxv1i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv1i64_nxv1i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV32-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: sext_nxv1i64_nxv1i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV64-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s32>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_SEXT %1(<vscale x 1 x s32>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: sext_nxv2i64_nxv2i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv2i64_nxv2i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV32-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: sext_nxv2i64_nxv2i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV64-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s32>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_SEXT %1(<vscale x 2 x s32>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: sext_nxv4i64_nxv4i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv4i64_nxv4i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV32-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: sext_nxv4i64_nxv4i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV64-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s32>) = COPY $v8m2
+ %0:_(<vscale x 4 x s64>) = G_SEXT %1(<vscale x 4 x s32>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: sext_nxv8i64_nxv8i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: sext_nxv8i64_nxv8i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV32-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV32-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: sext_nxv8i64_nxv8i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV64-NEXT: [[SEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV64-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s32>) = COPY $v8m4
+ %0:_(<vscale x 8 x s64>) = G_SEXT %1(<vscale x 8 x s32>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir
new file mode 100644
index 0000000..109536a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir
@@ -0,0 +1,694 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: splatvector_nxv1i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 1 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv1i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 1 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv1i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv1i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[AND1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 1 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s32) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s32)
+ %2:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 1 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv2i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 2 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv2i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 2 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv2i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv2i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[AND1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 2 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s32) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s32)
+ %2:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 2 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv4i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv4i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv4i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv4i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[AND1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 4 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s32) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s32)
+ %2:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 4 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv8i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv8i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv8i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv8i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[AND1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 8 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s32) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s32)
+ %2:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 8 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv16i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 16 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv16i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 16 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv16i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv16i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[AND1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 16 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s32) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s32)
+ %2:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 16 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv32i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv32i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 32 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv32i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv32i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 32 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv32i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv32i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[AND1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 32 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s32) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s32)
+ %2:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 32 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv64i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv64i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv64i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv64i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv64i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv64i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[AND1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 64 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s32) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s32)
+ %2:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 64 x s1>)
+ PseudoRET implicit $v0
+...
+
+---
+name: splatvector_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8 = COPY %2(<vscale x 1 x s8>)
+ PseudoRET implicit $v8
+
+...
+
+---
+name: splatvector_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8 = COPY %2(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8 = COPY %2(<vscale x 4 x s8>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8 = COPY %2(<vscale x 8 x s8>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv16i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m2
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8m2 = COPY %2(<vscale x 16 x s8>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: splatvector_nxv1i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8 = COPY %2(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv2i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8 = COPY %2(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8 = COPY %2(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv8i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m2
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8m2 = COPY %2(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: splatvector_nxv16i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m4
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8m4 = COPY %2(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: splatvector_nxv1i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8 = COPY %2(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv2i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8 = COPY %2(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv4i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m2
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8m2 = COPY %2(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: splatvector_nxv8i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m4
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8m4 = COPY %2(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: splatvector_nxv16i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m8
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8m8 = COPY %2(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir
new file mode 100644
index 0000000..7bf5f83
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir
@@ -0,0 +1,817 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: splatvector_nxv1i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 1 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv1i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 1 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv1i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv1i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 1 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 1 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s64) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s64)
+ %2:_(<vscale x 1 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 1 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv2i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 2 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv2i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 2 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv2i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv2i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 2 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s64) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s64)
+ %2:_(<vscale x 2 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 2 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv4i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv4i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv4i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv4i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 4 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s64) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s64)
+ %2:_(<vscale x 4 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 4 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv8i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv8i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv8i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv8i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 8 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s64) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s64)
+ %2:_(<vscale x 8 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 8 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv16i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 16 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv16i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 16 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 16 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv16i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv16i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 16 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 16 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s64) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s64)
+ %2:_(<vscale x 16 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 16 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv32i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv32i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 32 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv32i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv32i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 32 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv32i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv32i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 32 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 32 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s64) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s64)
+ %2:_(<vscale x 32 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 32 x s1>)
+ PseudoRET implicit $v0
+...
+---
+name: splatvector_nxv64i1_0
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv64i1_0
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMCLR_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 0
+ %1:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv64i1_1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv64i1_1
+ ; CHECK: [[VMSET_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VMSET_VL $x0
+ ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s1) = G_CONSTANT i1 1
+ %1:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %0(s1)
+ $v0 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v0
+
+...
+---
+name: splatvector_nxv64i1_2
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x10
+
+ ; CHECK-LABEL: name: splatvector_nxv64i1_2
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 64 x s1>) = G_ICMP intpred(ne), [[SPLAT_VECTOR]](<vscale x 64 x s8>), [[SPLAT_VECTOR1]]
+ ; CHECK-NEXT: $v0 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v0
+ %0:_(s64) = COPY $x10
+ %1:_(s1) = G_TRUNC %0(s64)
+ %2:_(<vscale x 64 x s1>) = G_SPLAT_VECTOR %1(s1)
+ $v0 = COPY %2(<vscale x 64 x s1>)
+ PseudoRET implicit $v0
+...
+
+---
+name: splatvector_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8 = COPY %2(<vscale x 1 x s8>)
+ PseudoRET implicit $v8
+
+...
+
+---
+name: splatvector_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8 = COPY %2(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8 = COPY %2(<vscale x 4 x s8>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8 = COPY %2(<vscale x 8 x s8>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv16i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i8
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+ %1:_(s8) = G_CONSTANT i8 0
+ %2:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR %1(s8)
+ $v8m2 = COPY %2(<vscale x 16 x s8>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: splatvector_nxv1i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8 = COPY %2(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv2i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8 = COPY %2(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8 = COPY %2(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv8i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8m2 = COPY %2(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: splatvector_nxv16i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i16
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+ %1:_(s16) = G_CONSTANT i16 0
+ %2:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR %1(s16)
+ $v8m4 = COPY %2(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: splatvector_nxv1i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8 = COPY %2(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv2i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8 = COPY %2(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv4i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8m2 = COPY %2(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: splatvector_nxv8i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8m4 = COPY %2(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: splatvector_nxv16i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv16i32
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]](<vscale x 16 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR %1(s32)
+ $v8m8 = COPY %2(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: splatvector_nxv1i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv1i64
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR %1(s64)
+ $v8 = COPY %2(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv2i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv2i64
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %1(s64)
+ $v8m2 = COPY %2(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: splatvector_nxv4i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv4i64
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR %1(s64)
+ $v8m4 = COPY %2(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: splatvector_nxv8i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: splatvector_nxv8i64
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR %1(s64)
+ $v8m8 = COPY %2(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir
new file mode 100644
index 0000000..806c9b9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir
@@ -0,0 +1,116 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=HasF64 %s
+# RUN: llc -mtriple=riscv32 -mattr=+Zve64x -run-pass=legalizer %s -o - | FileCheck --check-prefix=NoF64 %s
+
+---
+name: splatvector_nxv1i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; HasF64-LABEL: name: splatvector_nxv1i64
+ ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32)
+ ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; HasF64-NEXT: $v8 = COPY [[SPLAT_VECTOR]](<vscale x 1 x s64>)
+ ; HasF64-NEXT: PseudoRET implicit $v8
+ ;
+ ; NoF64-LABEL: name: splatvector_nxv1i64
+ ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0
+ ; NoF64-NEXT: $v8 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]](<vscale x 1 x s64>)
+ ; NoF64-NEXT: PseudoRET implicit $v8
+ %0:_(s64) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR %0(s64)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: splatvector_nxv2i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; HasF64-LABEL: name: splatvector_nxv2i64
+ ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32)
+ ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; HasF64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]](<vscale x 2 x s64>)
+ ; HasF64-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; NoF64-LABEL: name: splatvector_nxv2i64
+ ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0
+ ; NoF64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]](<vscale x 2 x s64>)
+ ; NoF64-NEXT: PseudoRET implicit $v8m2
+ %0:_(s64) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %0(s64)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: splatvector_nxv4i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; HasF64-LABEL: name: splatvector_nxv4i64
+ ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32)
+ ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; HasF64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]](<vscale x 4 x s64>)
+ ; HasF64-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; NoF64-LABEL: name: splatvector_nxv4i64
+ ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0
+ ; NoF64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]](<vscale x 4 x s64>)
+ ; NoF64-NEXT: PseudoRET implicit $v8m4
+ %0:_(s64) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR %0(s64)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: splatvector_nxv8i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.1:
+ ; HasF64-LABEL: name: splatvector_nxv8i64
+ ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32)
+ ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; HasF64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]](<vscale x 8 x s64>)
+ ; HasF64-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; NoF64-LABEL: name: splatvector_nxv8i64
+ ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0
+ ; NoF64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]](<vscale x 8 x s64>)
+ ; NoF64-NEXT: PseudoRET implicit $v8m8
+ %0:_(s64) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR %0(s64)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir
index 4de02b1..8a34521 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir
@@ -9,8 +9,8 @@ body: |
; CHECK-LABEL: name: test_nxv1i8
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 1 x s8>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 1 x s8>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 1 x s8>) = COPY $v8
%1:_(<vscale x 1 x s8>) = COPY $v9
@@ -27,8 +27,8 @@ body: |
; CHECK-LABEL: name: test_nxv2i8
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 2 x s8>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 2 x s8>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 2 x s8>) = COPY $v8
%1:_(<vscale x 2 x s8>) = COPY $v9
@@ -45,8 +45,8 @@ body: |
; CHECK-LABEL: name: test_nxv4i8
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 4 x s8>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 4 x s8>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 4 x s8>) = COPY $v8
%1:_(<vscale x 4 x s8>) = COPY $v9
@@ -63,8 +63,8 @@ body: |
; CHECK-LABEL: name: test_nxv8i8
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 8 x s8>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 8 x s8>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 8 x s8>) = COPY $v8
%1:_(<vscale x 8 x s8>) = COPY $v9
@@ -81,8 +81,8 @@ body: |
; CHECK-LABEL: name: test_nxv16i8
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v10m2
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m2 = COPY [[OR]](<vscale x 16 x s8>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m2 = COPY [[XOR]](<vscale x 16 x s8>)
; CHECK-NEXT: PseudoRET implicit $v8m2
%0:_(<vscale x 16 x s8>) = COPY $v8m2
%1:_(<vscale x 16 x s8>) = COPY $v10m2
@@ -99,8 +99,8 @@ body: |
; CHECK-LABEL: name: test_nxv32i8
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v12m4
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m4 = COPY [[OR]](<vscale x 32 x s8>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m4 = COPY [[XOR]](<vscale x 32 x s8>)
; CHECK-NEXT: PseudoRET implicit $v8m4
%0:_(<vscale x 32 x s8>) = COPY $v8m4
%1:_(<vscale x 32 x s8>) = COPY $v12m4
@@ -117,8 +117,8 @@ body: |
; CHECK-LABEL: name: test_nxv64i8
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 64 x s8>) = COPY $v8m8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 64 x s8>) = COPY $v16m8
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m8 = COPY [[OR]](<vscale x 64 x s8>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m8 = COPY [[XOR]](<vscale x 64 x s8>)
; CHECK-NEXT: PseudoRET implicit $v8m8
%0:_(<vscale x 64 x s8>) = COPY $v8m8
%1:_(<vscale x 64 x s8>) = COPY $v16m8
@@ -135,8 +135,8 @@ body: |
; CHECK-LABEL: name: test_nxv1i16
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 1 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 1 x s16>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 1 x s16>) = COPY $v8
%1:_(<vscale x 1 x s16>) = COPY $v9
@@ -153,8 +153,8 @@ body: |
; CHECK-LABEL: name: test_nxv2i16
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 2 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 2 x s16>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 2 x s16>) = COPY $v8
%1:_(<vscale x 2 x s16>) = COPY $v9
@@ -171,8 +171,8 @@ body: |
; CHECK-LABEL: name: test_nxv4i16
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 4 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 4 x s16>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 4 x s16>) = COPY $v8
%1:_(<vscale x 4 x s16>) = COPY $v9
@@ -189,8 +189,8 @@ body: |
; CHECK-LABEL: name: test_nxv8i16
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v10m2
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m2 = COPY [[OR]](<vscale x 8 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m2 = COPY [[XOR]](<vscale x 8 x s16>)
; CHECK-NEXT: PseudoRET implicit $v8m2
%0:_(<vscale x 8 x s16>) = COPY $v8m2
%1:_(<vscale x 8 x s16>) = COPY $v10m2
@@ -207,8 +207,8 @@ body: |
; CHECK-LABEL: name: test_nxv16i16
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v12m4
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m4 = COPY [[OR]](<vscale x 16 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m4 = COPY [[XOR]](<vscale x 16 x s16>)
; CHECK-NEXT: PseudoRET implicit $v8m4
%0:_(<vscale x 16 x s16>) = COPY $v8m4
%1:_(<vscale x 16 x s16>) = COPY $v12m4
@@ -225,8 +225,8 @@ body: |
; CHECK-LABEL: name: test_nxv32i16
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 32 x s16>) = COPY $v8m8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 32 x s16>) = COPY $v16m8
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m8 = COPY [[OR]](<vscale x 32 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m8 = COPY [[XOR]](<vscale x 32 x s16>)
; CHECK-NEXT: PseudoRET implicit $v8m8
%0:_(<vscale x 32 x s16>) = COPY $v8m8
%1:_(<vscale x 32 x s16>) = COPY $v16m8
@@ -243,8 +243,8 @@ body: |
; CHECK-LABEL: name: test_nxv1i32
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 1 x s32>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 1 x s32>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 1 x s32>) = COPY $v8
%1:_(<vscale x 1 x s32>) = COPY $v9
@@ -261,8 +261,8 @@ body: |
; CHECK-LABEL: name: test_nxv2i32
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 2 x s32>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 2 x s32>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 2 x s32>) = COPY $v8
%1:_(<vscale x 2 x s32>) = COPY $v9
@@ -279,8 +279,8 @@ body: |
; CHECK-LABEL: name: test_nxv4i32
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v10m2
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m2 = COPY [[OR]](<vscale x 4 x s32>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m2 = COPY [[XOR]](<vscale x 4 x s32>)
; CHECK-NEXT: PseudoRET implicit $v8m2
%0:_(<vscale x 4 x s32>) = COPY $v8m2
%1:_(<vscale x 4 x s32>) = COPY $v10m2
@@ -297,8 +297,8 @@ body: |
; CHECK-LABEL: name: test_nxv8i32
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v12m4
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m4 = COPY [[OR]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m4 = COPY [[XOR]](<vscale x 8 x s32>)
; CHECK-NEXT: PseudoRET implicit $v8m4
%0:_(<vscale x 8 x s32>) = COPY $v8m4
%1:_(<vscale x 8 x s32>) = COPY $v12m4
@@ -315,8 +315,8 @@ body: |
; CHECK-LABEL: name: test_nxv16i32
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 16 x s32>) = COPY $v8m8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s32>) = COPY $v16m8
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m8 = COPY [[OR]](<vscale x 16 x s32>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m8 = COPY [[XOR]](<vscale x 16 x s32>)
; CHECK-NEXT: PseudoRET implicit $v8m8
%0:_(<vscale x 16 x s32>) = COPY $v8m8
%1:_(<vscale x 16 x s32>) = COPY $v16m8
@@ -333,8 +333,8 @@ body: |
; CHECK-LABEL: name: test_nxv1i64
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 1 x s64>) = COPY $v8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 1 x s64>) = COPY $v9
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8 = COPY [[OR]](<vscale x 1 x s64>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8 = COPY [[XOR]](<vscale x 1 x s64>)
; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 1 x s64>) = COPY $v8
%1:_(<vscale x 1 x s64>) = COPY $v9
@@ -351,8 +351,8 @@ body: |
; CHECK-LABEL: name: test_nxv2i64
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $v8m2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $v10m2
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m2 = COPY [[OR]](<vscale x 2 x s64>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m2 = COPY [[XOR]](<vscale x 2 x s64>)
; CHECK-NEXT: PseudoRET implicit $v8m2
%0:_(<vscale x 2 x s64>) = COPY $v8m2
%1:_(<vscale x 2 x s64>) = COPY $v10m2
@@ -369,8 +369,8 @@ body: |
; CHECK-LABEL: name: test_nxv4i64
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 4 x s64>) = COPY $v8m4
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s64>) = COPY $v12m4
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m4 = COPY [[OR]](<vscale x 4 x s64>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m4 = COPY [[XOR]](<vscale x 4 x s64>)
; CHECK-NEXT: PseudoRET implicit $v8m4
%0:_(<vscale x 4 x s64>) = COPY $v8m4
%1:_(<vscale x 4 x s64>) = COPY $v12m4
@@ -387,8 +387,8 @@ body: |
; CHECK-LABEL: name: test_nxv8i64
; CHECK: [[COPY:%[0-9]+]]:_(<vscale x 8 x s64>) = COPY $v8m8
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s64>) = COPY $v16m8
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_XOR [[COPY]], [[COPY1]]
- ; CHECK-NEXT: $v8m8 = COPY [[OR]](<vscale x 8 x s64>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_XOR [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $v8m8 = COPY [[XOR]](<vscale x 8 x s64>)
; CHECK-NEXT: PseudoRET implicit $v8m8
%0:_(<vscale x 8 x s64>) = COPY $v8m8
%1:_(<vscale x 8 x s64>) = COPY $v16m8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir
new file mode 100644
index 0000000..fe4ddfa
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir
@@ -0,0 +1,1589 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s
+
+# Extend from s1 element vectors
+---
+name: zext_nxv1i8_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i8_nxv1i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i8_nxv1i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s8>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v8
+ %0:_(<vscale x 1 x s8>) = G_ZEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv1i16_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i16_nxv1i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i16_nxv1i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v8
+ %0:_(<vscale x 1 x s16>) = G_ZEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv1i32_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i32_nxv1i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i32_nxv1i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v8
+ %0:_(<vscale x 1 x s32>) = G_ZEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv1i64_nxv1i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i64_nxv1i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i64_nxv1i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_SELECT [[COPY]](<vscale x 1 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s1>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_ZEXT %1(<vscale x 1 x s1>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i8_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i8_nxv2i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv2i8_nxv2i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v8
+ %0:_(<vscale x 2 x s8>) = G_ZEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i16_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i16_nxv2i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv2i16_nxv2i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v8
+ %0:_(<vscale x 2 x s16>) = G_ZEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i32_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i32_nxv2i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv2i32_nxv2i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s1>) = COPY $v8
+ %0:_(<vscale x 2 x s32>) = G_ZEXT %1(<vscale x 2 x s1>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i64_nxv2i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i64_nxv2i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv2i64_nxv2i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SELECT [[COPY]](<vscale x 2 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s1>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_ZEXT %1(<vscale x 2 x s1>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv4i8_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i8_nxv4i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv4i8_nxv4i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s1>) = COPY $v8
+ %0:_(<vscale x 4 x s8>) = G_ZEXT %1(<vscale x 4 x s1>)
+ $v8 = COPY %0(<vscale x 4 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv4i16_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i16_nxv4i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv4i16_nxv4i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 4 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s1>) = COPY $v8
+ %0:_(<vscale x 4 x s16>) = G_ZEXT %1(<vscale x 4 x s1>)
+ $v8 = COPY %0(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv4i32_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i32_nxv4i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv4i32_nxv4i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s1>) = COPY $v8
+ %0:_(<vscale x 4 x s32>) = G_ZEXT %1(<vscale x 4 x s1>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv4i64_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i64_nxv4i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv4i64_nxv4i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_SELECT [[COPY]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s1>) = COPY $v8
+ %0:_(<vscale x 4 x s64>) = G_ZEXT %1(<vscale x 4 x s1>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv8i8_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i8_nxv8i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv8i8_nxv8i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[SELECT]](<vscale x 8 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 8 x s1>) = COPY $v8
+ %0:_(<vscale x 8 x s8>) = G_ZEXT %1(<vscale x 8 x s1>)
+ $v8 = COPY %0(<vscale x 8 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv8i16_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i16_nxv8i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv8i16_nxv8i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 8 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 8 x s1>) = COPY $v8
+ %0:_(<vscale x 8 x s16>) = G_ZEXT %1(<vscale x 8 x s1>)
+ $v8m2 = COPY %0(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv8i32_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i32_nxv8i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv8i32_nxv8i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s1>) = COPY $v8
+ %0:_(<vscale x 8 x s32>) = G_ZEXT %1(<vscale x 8 x s1>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv8i64_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i64_nxv8i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[MV1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv8i64_nxv8i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SPLAT_VECTOR [[C1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_SELECT [[COPY]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s1>) = COPY $v8
+ %0:_(<vscale x 8 x s64>) = G_ZEXT %1(<vscale x 8 x s1>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: zext_nxv16i8_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv16i8_nxv16i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv16i8_nxv16i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s8>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m2 = COPY [[SELECT]](<vscale x 16 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 16 x s1>) = COPY $v8
+ %0:_(<vscale x 16 x s8>) = G_ZEXT %1(<vscale x 16 x s1>)
+ $v8m2 = COPY %0(<vscale x 16 x s8>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv16i16_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv16i16_nxv16i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv16i16_nxv16i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 16 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 16 x s1>) = COPY $v8
+ %0:_(<vscale x 16 x s16>) = G_ZEXT %1(<vscale x 16 x s1>)
+ $v8m4 = COPY %0(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv16i32_nxv16i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv16i32_nxv16i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv16i32_nxv16i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_SELECT [[COPY]](<vscale x 16 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s1>) = COPY $v8
+ %0:_(<vscale x 16 x s32>) = G_ZEXT %1(<vscale x 16 x s1>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+---
+name: zext_nxv32i8_nxv32i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv32i8_nxv32i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv32i8_nxv32i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s8>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m4 = COPY [[SELECT]](<vscale x 32 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 32 x s1>) = COPY $v8
+ %0:_(<vscale x 32 x s8>) = G_ZEXT %1(<vscale x 32 x s1>)
+ $v8m4 = COPY %0(<vscale x 32 x s8>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv32i16_nxv32i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv32i16_nxv32i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv32i16_nxv32i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_SELECT [[COPY]](<vscale x 32 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 32 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 32 x s1>) = COPY $v8
+ %0:_(<vscale x 32 x s16>) = G_ZEXT %1(<vscale x 32 x s1>)
+ $v8m8 = COPY %0(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+...
+---
+name: zext_nxv64i8_nxv64i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv64i8_nxv64i1
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v8
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv64i8_nxv64i1
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 64 x s1>) = COPY $v8
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[COPY]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8m8 = COPY [[SELECT]](<vscale x 64 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 64 x s1>) = COPY $v8
+ %0:_(<vscale x 64 x s8>) = G_ZEXT %1(<vscale x 64 x s1>)
+ $v8m8 = COPY %0(<vscale x 64 x s8>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s8 element vectors
+---
+name: zext_nxv1i16_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i16_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i16_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s16>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s16>) = G_ZEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv1i32_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i32_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i32_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s32>) = G_ZEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv1i64_nxv1i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i64_nxv1i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i64_nxv1i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s8>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_ZEXT %1(<vscale x 1 x s8>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i16_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i16_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv2i16_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s16>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s16>) = G_ZEXT %1(<vscale x 2 x s8>)
+ $v8 = COPY %0(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i32_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i32_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv2i32_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s32>) = G_ZEXT %1(<vscale x 2 x s8>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i64_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i64_nxv2i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv2i64_nxv2i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s8>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_ZEXT %1(<vscale x 2 x s8>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv4i16_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i16_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 4 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv4i16_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s16>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 4 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s16>) = G_ZEXT %1(<vscale x 4 x s8>)
+ $v8 = COPY %0(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv4i32_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i32_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv4i32_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s32>) = G_ZEXT %1(<vscale x 4 x s8>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv4i64_nxv4i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i64_nxv4i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv4i64_nxv4i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s8>) = COPY $v8
+ %0:_(<vscale x 4 x s64>) = G_ZEXT %1(<vscale x 4 x s8>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv8i16_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i16_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 8 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv8i16_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s16>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 8 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s16>) = G_ZEXT %1(<vscale x 8 x s8>)
+ $v8m2 = COPY %0(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv8i32_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i32_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv8i32_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s32>) = G_ZEXT %1(<vscale x 8 x s8>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv8i64_nxv8i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i64_nxv8i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv8i64_nxv8i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s8>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s8>) = COPY $v8
+ %0:_(<vscale x 8 x s64>) = G_ZEXT %1(<vscale x 8 x s8>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: zext_nxv16i16_nxv16i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv16i16_nxv16i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_ZEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 16 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv16i16_nxv16i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s16>) = G_ZEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 16 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 16 x s8>) = COPY $v8m2
+ %0:_(<vscale x 16 x s16>) = G_ZEXT %1(<vscale x 16 x s8>)
+ $v8m4 = COPY %0(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv16i32_nxv16i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv16i32_nxv16i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv16i32_nxv16i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s8>) = COPY $v8m2
+ %0:_(<vscale x 16 x s32>) = G_ZEXT %1(<vscale x 16 x s8>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+---
+name: zext_nxv32i16_nxv32i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv32i16_nxv32i8
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_ZEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 32 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv32i16_nxv32i8
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 32 x s16>) = G_ZEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 32 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 32 x s8>) = COPY $v8m4
+ %0:_(<vscale x 32 x s16>) = G_ZEXT %1(<vscale x 32 x s8>)
+ $v8m8 = COPY %0(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s16 element vectors
+---
+name: zext_nxv1i32_nxv1i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i32_nxv1i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i32_nxv1i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s16>) = COPY $v8
+ %0:_(<vscale x 1 x s32>) = G_ZEXT %1(<vscale x 1 x s16>)
+ $v8 = COPY %0(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv1i64_nxv1i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i64_nxv1i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i64_nxv1i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s16>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_ZEXT %1(<vscale x 1 x s16>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i32_nxv2i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i32_nxv2i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv2i32_nxv2i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 2 x s16>) = COPY $v8
+ %0:_(<vscale x 2 x s32>) = G_ZEXT %1(<vscale x 2 x s16>)
+ $v8 = COPY %0(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i64_nxv2i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i64_nxv2i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv2i64_nxv2i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s16>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_ZEXT %1(<vscale x 2 x s16>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv4i32_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i32_nxv4i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv4i32_nxv4i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 4 x s16>) = COPY $v8
+ %0:_(<vscale x 4 x s32>) = G_ZEXT %1(<vscale x 4 x s16>)
+ $v8m2 = COPY %0(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv4i64_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i64_nxv4i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv4i64_nxv4i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s16>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s16>) = COPY $v8
+ %0:_(<vscale x 4 x s64>) = G_ZEXT %1(<vscale x 4 x s16>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv8i32_nxv8i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i32_nxv8i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv8i32_nxv8i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 8 x s16>) = COPY $v8m2
+ %0:_(<vscale x 8 x s32>) = G_ZEXT %1(<vscale x 8 x s16>)
+ $v8m4 = COPY %0(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv8i64_nxv8i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i64_nxv8i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m4
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv8i64_nxv8i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $v8m4
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s16>) = COPY $v8m4
+ %0:_(<vscale x 8 x s64>) = G_ZEXT %1(<vscale x 8 x s16>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
+---
+name: zext_nxv16i32_nxv16i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv16i32_nxv16i16
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv16i32_nxv16i16
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 16 x s16>) = COPY $v8m4
+ %0:_(<vscale x 16 x s32>) = G_ZEXT %1(<vscale x 16 x s16>)
+ $v8m8 = COPY %0(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+...
+
+# Extend from s32 element vectors
+---
+name: zext_nxv1i64_nxv1i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv1i64_nxv1i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV32-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: zext_nxv1i64_nxv1i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 1 x s32>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV64-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %1:_(<vscale x 1 x s32>) = COPY $v8
+ %0:_(<vscale x 1 x s64>) = G_ZEXT %1(<vscale x 1 x s32>)
+ $v8 = COPY %0(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+...
+---
+name: zext_nxv2i64_nxv2i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv2i64_nxv2i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV32-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64-LABEL: name: zext_nxv2i64_nxv2i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s32>) = COPY $v8
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV64-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m2
+ %1:_(<vscale x 2 x s32>) = COPY $v8
+ %0:_(<vscale x 2 x s64>) = G_ZEXT %1(<vscale x 2 x s32>)
+ $v8m2 = COPY %0(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+...
+---
+name: zext_nxv4i64_nxv4i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv4i64_nxv4i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV32-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64-LABEL: name: zext_nxv4i64_nxv4i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV64-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m4
+ %1:_(<vscale x 4 x s32>) = COPY $v8m2
+ %0:_(<vscale x 4 x s64>) = G_ZEXT %1(<vscale x 4 x s32>)
+ $v8m4 = COPY %0(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+...
+---
+name: zext_nxv8i64_nxv8i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+ ; RV32-LABEL: name: zext_nxv8i64_nxv8i32
+ ; RV32: liveins: $v8
+ ; RV32-NEXT: {{ $}}
+ ; RV32-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV32-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64-LABEL: name: zext_nxv8i64_nxv8i32
+ ; RV64: liveins: $v8
+ ; RV64-NEXT: {{ $}}
+ ; RV64-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV64-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8m8
+ %1:_(<vscale x 8 x s32>) = COPY $v8m4
+ %0:_(<vscale x 8 x s64>) = G_ZEXT %1(<vscale x 8 x s32>)
+ $v8m8 = COPY %0(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir
new file mode 100644
index 0000000..062179c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir
@@ -0,0 +1,820 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \
+# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \
+# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck -check-prefix=RV64I %s
+
+---
+name: anyext_nxv1i16_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i16_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i16_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s16>) = G_ANYEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv1i32_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s32>) = G_ANYEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv1i64_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i16_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i16_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i16_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s16>) = G_ANYEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i32_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s32>) = G_ANYEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i64_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s8>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i16_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i16_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i16_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s16>) = G_ANYEXT %0(<vscale x 4 x s8>)
+ $v8 = COPY %1(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv4i32_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s32>) = G_ANYEXT %0(<vscale x 4 x s8>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i64_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s8>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i16_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i16_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i16_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s16>) = G_ANYEXT %0(<vscale x 8 x s8>)
+ $v8m2 = COPY %1(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv8i32_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s32>) = G_ANYEXT %0(<vscale x 8 x s8>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i64_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s8>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv16i16_nxv16i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv16i16_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 16 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv16i16_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 16 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 16 x s8>) = COPY $v8m2
+ %1:_(<vscale x 16 x s16>) = G_ANYEXT %0(<vscale x 16 x s8>)
+ $v8m4 = COPY %1(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv16i32_nxv16i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m4
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m4
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 16 x s8>) = COPY $v8m4
+ %1:_(<vscale x 16 x s32>) = G_ANYEXT %0(<vscale x 16 x s8>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv32i16_nxv32i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv32i16_nxv32i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_ANYEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 32 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv32i16_nxv32i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_ANYEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 32 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 32 x s8>) = COPY $v8m4
+ %1:_(<vscale x 32 x s16>) = G_ANYEXT %0(<vscale x 32 x s8>)
+ $v8m8 = COPY %1(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv1i32_nxv1i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = COPY $v8
+ %1:_(<vscale x 1 x s32>) = G_ANYEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv1i64_nxv1i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i32_nxv2i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s16>) = COPY $v8
+ %1:_(<vscale x 2 x s32>) = G_ANYEXT %0(<vscale x 2 x s16>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i64_nxv2i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s16>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s16>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i32_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 4 x s16>) = COPY $v8
+ %1:_(<vscale x 4 x s32>) = G_ANYEXT %0(<vscale x 4 x s16>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i64_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s16>) = COPY $v8
+ %1:_(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s16>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i32_nxv8i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 8 x s16>) = COPY $v8m2
+ %1:_(<vscale x 8 x s32>) = G_ANYEXT %0(<vscale x 8 x s16>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i64_nxv8i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s16>) = COPY $v8m2
+ %1:_(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s16>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv16i32_nxv16i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ANYEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 16 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 16 x s16>) = COPY $v8m4
+ %1:_(<vscale x 16 x s32>) = G_ANYEXT %0(<vscale x 16 x s16>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: anyext_nxv1i64_nxv1i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ANYEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: $v8 = COPY [[ANYEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s32>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_ANYEXT %0(<vscale x 1 x s32>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: anyext_nxv2i64_nxv2i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ANYEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s32>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_ANYEXT %0(<vscale x 2 x s32>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: anyext_nxv4i64_nxv4i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ANYEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s32>) = COPY $v8m2
+ %1:_(<vscale x 4 x s64>) = G_ANYEXT %0(<vscale x 4 x s32>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: anyext_nxv8i64_nxv8i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ANYEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s32>) = COPY $v8m4
+ %1:_(<vscale x 8 x s64>) = G_ANYEXT %0(<vscale x 8 x s32>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir
new file mode 100644
index 0000000..925d6ae
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir
@@ -0,0 +1,675 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \
+# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \
+# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck -check-prefix=RV64I %s
+
+---
+name: icmp_nxv1i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i1
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s1>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i1
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s1>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s1>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i1
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s1>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i1
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s1>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s1>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i1
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s1>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i1
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s1>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s1>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i1
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s1>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i1
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s1>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s1>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv16i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv16i1
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s1>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv16i1
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s1>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 16 x s1>), %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv32i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv32i1
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s1>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv32i1
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s1>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s1>), %0
+ $v8 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv64i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv64i1
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s1>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv64i1
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s1>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 64 x s1>), %0
+ $v8 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s8>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s8>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s8>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s8>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s8>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s8>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s8>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s8>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s8>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s8>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s8>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s8>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv16i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv16i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s8>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv16i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s8>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 16 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 16 x s8>), %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv32i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv32i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s8>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv32i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s8>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 32 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s8>), %0
+ $v8 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv64i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv64i8
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s8>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv64i8
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 64 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 64 x s8>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 64 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 64 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 64 x s8>), %0
+ $v8 = COPY %1(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv1i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s16>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s16>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s16>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s16>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s16>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s16>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s16>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s16>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s16>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s16>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s16>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s16>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv16i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv16i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s16>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv16i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s16>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 16 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 16 x s16>), %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv32i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv32i16
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s16>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv32i16
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 32 x s16>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 32 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 32 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s16>), %0
+ $v8 = COPY %1(<vscale x 32 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv1i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i32
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s32>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i32
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s32>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s32>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i32
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s32>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i32
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s32>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s32>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i32
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s32>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i32
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s32>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s32>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i32
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s32>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i32
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s32>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s32>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv16i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv16i32
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s32>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv16i32
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 16 x s32>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 16 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 16 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 16 x s32>), %0
+ $v8 = COPY %1(<vscale x 16 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv1i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv1i64
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s64>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv1i64
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 1 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 1 x s64>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 1 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 1 x s64>), %0
+ $v8 = COPY %1(<vscale x 1 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv2i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv2i64
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s64>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv2i64
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 2 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 2 x s64>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 2 x s64>), %0
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv4i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv4i64
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s64>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv4i64
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 4 x s64>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s64>), %0
+ $v8 = COPY %1(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: icmp_nxv8i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32I-LABEL: name: icmp_nxv8i64
+ ; RV32I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s64>), [[DEF]]
+ ; RV32I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: icmp_nxv8i64
+ ; RV64I: [[DEF:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb(<vscale x 8 x s1>) = G_ICMP intpred(sgt), [[DEF]](<vscale x 8 x s64>), [[DEF]]
+ ; RV64I-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 8 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 8 x s64>), %0
+ $v8 = COPY %1(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir
new file mode 100644
index 0000000..a754b8b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir
@@ -0,0 +1,820 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \
+# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \
+# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck -check-prefix=RV64I %s
+
+---
+name: sext_nxv1i16_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i16_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i16_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s16>) = G_SEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv1i32_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i32_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i32_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s32>) = G_SEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv1i64_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i64_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i64_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i16_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i16_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i16_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s16>) = G_SEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i32_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i32_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i32_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s32>) = G_SEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i64_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i64_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i64_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s8>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i16_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i16_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i16_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s16>) = G_SEXT %0(<vscale x 4 x s8>)
+ $v8 = COPY %1(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv4i32_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i32_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i32_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s32>) = G_SEXT %0(<vscale x 4 x s8>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i64_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i64_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i64_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s8>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i16_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i16_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i16_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s16>) = G_SEXT %0(<vscale x 8 x s8>)
+ $v8m2 = COPY %1(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv8i32_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i32_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i32_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s32>) = G_SEXT %0(<vscale x 8 x s8>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i64_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i64_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i64_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s8>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv16i16_nxv16i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv16i16_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_SEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 16 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv16i16_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_SEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 16 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 16 x s8>) = COPY $v8m2
+ %1:_(<vscale x 16 x s16>) = G_SEXT %0(<vscale x 16 x s8>)
+ $v8m4 = COPY %1(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv16i32_nxv16i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv16i32_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv16i32_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 16 x s8>) = COPY $v8m2
+ %1:_(<vscale x 16 x s32>) = G_SEXT %0(<vscale x 16 x s8>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv32i16_nxv32i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv32i16_nxv32i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_SEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 32 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv32i16_nxv32i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_SEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 32 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 32 x s8>) = COPY $v8m4
+ %1:_(<vscale x 32 x s16>) = G_SEXT %0(<vscale x 32 x s8>)
+ $v8m8 = COPY %1(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv1i32_nxv1i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i32_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i32_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_SEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = COPY $v8
+ %1:_(<vscale x 1 x s32>) = G_SEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv1i64_nxv1i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i64_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i64_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i32_nxv2i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i32_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i32_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_SEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s16>) = COPY $v8
+ %1:_(<vscale x 2 x s32>) = G_SEXT %0(<vscale x 2 x s16>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i64_nxv2i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i64_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i64_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s16>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s16>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i32_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i32_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i32_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_SEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 4 x s16>) = COPY $v8
+ %1:_(<vscale x 4 x s32>) = G_SEXT %0(<vscale x 4 x s16>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i64_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i64_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i64_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s16>) = COPY $v8
+ %1:_(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s16>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i32_nxv8i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i32_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i32_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_SEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 8 x s16>) = COPY $v8m2
+ %1:_(<vscale x 8 x s32>) = G_SEXT %0(<vscale x 8 x s16>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i64_nxv8i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i64_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i64_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s16>) = COPY $v8m2
+ %1:_(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s16>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv16i32_nxv16i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv16i32_nxv16i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv16i32_nxv16i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_SEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 16 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 16 x s16>) = COPY $v8m4
+ %1:_(<vscale x 16 x s32>) = G_SEXT %0(<vscale x 16 x s16>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: sext_nxv1i64_nxv1i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv1i64_nxv1i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: sext_nxv1i64_nxv1i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_SEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: $v8 = COPY [[SEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s32>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_SEXT %0(<vscale x 1 x s32>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: sext_nxv2i64_nxv2i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv2i64_nxv2i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: sext_nxv2i64_nxv2i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_SEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: $v8m2 = COPY [[SEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s32>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_SEXT %0(<vscale x 2 x s32>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: sext_nxv4i64_nxv4i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv4i64_nxv4i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: sext_nxv4i64_nxv4i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_SEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: $v8m4 = COPY [[SEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s32>) = COPY $v8m2
+ %1:_(<vscale x 4 x s64>) = G_SEXT %0(<vscale x 4 x s32>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: sext_nxv8i64_nxv8i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: sext_nxv8i64_nxv8i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: sext_nxv8i64_nxv8i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_SEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: $v8m8 = COPY [[SEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s32>) = COPY $v8m4
+ %1:_(<vscale x 8 x s64>) = G_SEXT %0(<vscale x 8 x s32>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir
new file mode 100644
index 0000000..c3bc4a9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir
@@ -0,0 +1,820 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \
+# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck -check-prefix=RV32I %s
+# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \
+# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \
+# RUN: -o - | FileCheck -check-prefix=RV64I %s
+
+---
+name: zext_nxv1i16_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i16_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i16_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s16>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s16>) = G_ZEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv1i32_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i32_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i32_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s32>) = G_ZEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv1i64_nxv1i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i64_nxv1i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i64_nxv1i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s8>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s8>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i16_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i16_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i16_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s16>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s16>) = G_ZEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i32_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i32_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i32_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s32>) = G_ZEXT %0(<vscale x 2 x s8>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i64_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i64_nxv2i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i64_nxv2i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s8>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s8>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i16_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i16_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i16_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s16>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s16>) = G_ZEXT %0(<vscale x 4 x s8>)
+ $v8 = COPY %1(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv4i32_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i32_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i32_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s32>) = G_ZEXT %0(<vscale x 4 x s8>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i64_nxv4i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i64_nxv4i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i64_nxv4i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s8>) = COPY $v8
+ %1:_(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s8>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i16_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i16_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i16_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s16>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s16>) = G_ZEXT %0(<vscale x 8 x s8>)
+ $v8m2 = COPY %1(<vscale x 8 x s16>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv8i32_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i32_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i32_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s32>) = G_ZEXT %0(<vscale x 8 x s8>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i64_nxv8i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i64_nxv8i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i64_nxv8i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s8>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s8>) = COPY $v8
+ %1:_(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s8>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv16i16_nxv16i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv16i16_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_ZEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 16 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv16i16_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s16>) = G_ZEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 16 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 16 x s8>) = COPY $v8m2
+ %1:_(<vscale x 16 x s16>) = G_ZEXT %0(<vscale x 16 x s8>)
+ $v8m4 = COPY %1(<vscale x 16 x s16>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv16i32_nxv16i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv16i32_nxv16i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv16i32_nxv16i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s8>) = COPY $v8m2
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 16 x s8>) = COPY $v8m2
+ %1:_(<vscale x 16 x s32>) = G_ZEXT %0(<vscale x 16 x s8>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv32i16_nxv32i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv32i16_nxv32i8
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_ZEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 32 x s16>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv32i16_nxv32i8
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 32 x s8>) = COPY $v8m4
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 32 x s16>) = G_ZEXT [[COPY]](<vscale x 32 x s8>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 32 x s16>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 32 x s8>) = COPY $v8m4
+ %1:_(<vscale x 32 x s16>) = G_ZEXT %0(<vscale x 32 x s8>)
+ $v8m8 = COPY %1(<vscale x 32 x s16>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv1i32_nxv1i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i32_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i32_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s32>) = G_ZEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = COPY $v8
+ %1:_(<vscale x 1 x s32>) = G_ZEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv1i64_nxv1i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i64_nxv1i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i64_nxv1i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s16>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s16>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i32_nxv2i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i32_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i32_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s32>) = G_ZEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s16>) = COPY $v8
+ %1:_(<vscale x 2 x s32>) = G_ZEXT %0(<vscale x 2 x s16>)
+ $v8 = COPY %1(<vscale x 2 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i64_nxv2i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i64_nxv2i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i64_nxv2i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s16>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s16>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s16>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i32_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i32_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i32_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s32>) = G_ZEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 4 x s16>) = COPY $v8
+ %1:_(<vscale x 4 x s32>) = G_ZEXT %0(<vscale x 4 x s16>)
+ $v8m2 = COPY %1(<vscale x 4 x s32>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i64_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i64_nxv4i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i64_nxv4i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s16>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s16>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s16>) = COPY $v8
+ %1:_(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s16>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i32_nxv8i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i32_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i32_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m2
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s32>) = G_ZEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 8 x s16>) = COPY $v8m2
+ %1:_(<vscale x 8 x s32>) = G_ZEXT %0(<vscale x 8 x s16>)
+ $v8m4 = COPY %1(<vscale x 8 x s32>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i64_nxv8i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i64_nxv8i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m4
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i64_nxv8i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s16>) = COPY $v8m4
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s16>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s16>) = COPY $v8m4
+ %1:_(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s16>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv16i32_nxv16i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv16i32_nxv16i16
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv16i32_nxv16i16
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 16 x s16>) = COPY $v8m4
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 16 x s32>) = G_ZEXT [[COPY]](<vscale x 16 x s16>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 16 x s32>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 16 x s16>) = COPY $v8m4
+ %1:_(<vscale x 16 x s32>) = G_ZEXT %0(<vscale x 16 x s16>)
+ $v8m8 = COPY %1(<vscale x 16 x s32>)
+ PseudoRET implicit $v8m8
+
+...
+---
+name: zext_nxv1i64_nxv1i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv1i64_nxv1i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV32I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64I-LABEL: name: zext_nxv1i64_nxv1i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 1 x s32>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 1 x s64>) = G_ZEXT [[COPY]](<vscale x 1 x s32>)
+ ; RV64I-NEXT: $v8 = COPY [[ZEXT]](<vscale x 1 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 1 x s32>) = COPY $v8
+ %1:_(<vscale x 1 x s64>) = G_ZEXT %0(<vscale x 1 x s32>)
+ $v8 = COPY %1(<vscale x 1 x s64>)
+ PseudoRET implicit $v8
+
+...
+---
+name: zext_nxv2i64_nxv2i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv2i64_nxv2i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m2
+ ;
+ ; RV64I-LABEL: name: zext_nxv2i64_nxv2i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 2 x s32>) = COPY $v8
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 2 x s64>) = G_ZEXT [[COPY]](<vscale x 2 x s32>)
+ ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]](<vscale x 2 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m2
+ %0:_(<vscale x 2 x s32>) = COPY $v8
+ %1:_(<vscale x 2 x s64>) = G_ZEXT %0(<vscale x 2 x s32>)
+ $v8m2 = COPY %1(<vscale x 2 x s64>)
+ PseudoRET implicit $v8m2
+
+...
+---
+name: zext_nxv4i64_nxv4i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv4i64_nxv4i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m4
+ ;
+ ; RV64I-LABEL: name: zext_nxv4i64_nxv4i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 4 x s32>) = COPY $v8m2
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 4 x s64>) = G_ZEXT [[COPY]](<vscale x 4 x s32>)
+ ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]](<vscale x 4 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m4
+ %0:_(<vscale x 4 x s32>) = COPY $v8m2
+ %1:_(<vscale x 4 x s64>) = G_ZEXT %0(<vscale x 4 x s32>)
+ $v8m4 = COPY %1(<vscale x 4 x s64>)
+ PseudoRET implicit $v8m4
+
+...
+---
+name: zext_nxv8i64_nxv8i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $v8
+
+ ; RV32I-LABEL: name: zext_nxv8i64_nxv8i32
+ ; RV32I: liveins: $v8
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV32I-NEXT: PseudoRET implicit $v8m8
+ ;
+ ; RV64I-LABEL: name: zext_nxv8i64_nxv8i32
+ ; RV64I: liveins: $v8
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb(<vscale x 8 x s32>) = COPY $v8m4
+ ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb(<vscale x 8 x s64>) = G_ZEXT [[COPY]](<vscale x 8 x s32>)
+ ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]](<vscale x 8 x s64>)
+ ; RV64I-NEXT: PseudoRET implicit $v8m8
+ %0:_(<vscale x 8 x s32>) = COPY $v8m4
+ %1:_(<vscale x 8 x s64>) = G_ZEXT %0(<vscale x 8 x s32>)
+ $v8m8 = COPY %1(<vscale x 8 x s64>)
+ PseudoRET implicit $v8m8
+
+...
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
index bafa92e..65d0768 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
@@ -18,14 +18,12 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vmerge.vim v8, v8, -1, v0
-; RV32-NEXT: vand.vv v8, v11, v8
+; RV32-NEXT: vmerge.vvm v8, v8, v11, v0
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: sub a0, a0, a1
-; RV32-NEXT: lui a1, 16
-; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: slli a0, a0, 16
+; RV32-NEXT: srli a0, a0, 16
; RV32-NEXT: ret
;
; RV64-LABEL: ctz_nxv4i32:
@@ -41,14 +39,12 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmerge.vim v8, v8, -1, v0
-; RV64-NEXT: vand.vv v8, v11, v8
+; RV64-NEXT: vmerge.vvm v8, v8, v11, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: sub a0, a0, a1
-; RV64-NEXT: lui a1, 16
-; RV64-NEXT: addiw a1, a1, -1
-; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: slli a0, a0, 48
+; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32> %a, i1 0)
ret i32 %res
@@ -158,8 +154,7 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; RV64-NEXT: li a1, -1
; RV64-NEXT: vmadd.vx v16, a1, v8
; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmerge.vim v8, v8, -1, v0
-; RV64-NEXT: vand.vv v8, v16, v8
+; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: subw a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
index f5305a1..83d1d1b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
@@ -19,10 +19,9 @@ define <4 x i64> @vwsll_vv_v4i64_sext(<4 x i32> %a, <4 x i32> %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_sext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <4 x i32> %a to <4 x i64>
%y = sext <4 x i32> %b to <4 x i64>
@@ -41,10 +40,9 @@ define <4 x i64> @vwsll_vv_v4i64_zext(<4 x i32> %a, <4 x i32> %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <4 x i32> %a to <4 x i64>
%y = zext <4 x i32> %b to <4 x i64>
@@ -62,9 +60,9 @@ define <4 x i64> @vwsll_vx_i64_v4i64(<4 x i32> %a, i64 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i64_v4i64:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i64> poison, i64 %b, i32 0
%splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer
@@ -88,10 +86,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_sext(<4 x i32> %a, i32 %b) {
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -116,10 +112,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) {
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -142,12 +136,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_sext(<4 x i32> %a, i16 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_sext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -170,12 +161,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_zext(<4 x i32> %a, i16 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -198,12 +186,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_sext(<4 x i32> %a, i8 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_sext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -226,12 +211,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_zext(<4 x i32> %a, i8 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -251,9 +233,9 @@ define <4 x i64> @vwsll_vi_v4i64(<4 x i32> %a) {
;
; CHECK-ZVBB-LABEL: vwsll_vi_v4i64:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <4 x i32> %a to <4 x i64>
%z = shl <4 x i64> %x, splat (i64 2)
@@ -275,10 +257,9 @@ define <8 x i32> @vwsll_vv_v8i32_sext(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_sext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <8 x i16> %a to <8 x i32>
%y = sext <8 x i16> %b to <8 x i32>
@@ -297,10 +278,9 @@ define <8 x i32> @vwsll_vv_v8i32_zext(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <8 x i16> %a to <8 x i32>
%y = zext <8 x i16> %b to <8 x i32>
@@ -318,9 +298,9 @@ define <8 x i32> @vwsll_vx_i64_v8i32(<8 x i16> %a, i64 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i64_v8i32:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i64> poison, i64 %b, i32 0
%splat = shufflevector <8 x i64> %head, <8 x i64> poison, <8 x i32> zeroinitializer
@@ -340,9 +320,9 @@ define <8 x i32> @vwsll_vx_i32_v8i32(<8 x i16> %a, i32 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v8i32:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i32> poison, i32 %b, i32 0
%splat = shufflevector <8 x i32> %head, <8 x i32> poison, <8 x i32> zeroinitializer
@@ -366,10 +346,8 @@ define <8 x i32> @vwsll_vx_i16_v8i32_sext(<8 x i16> %a, i16 %b) {
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -394,10 +372,8 @@ define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) {
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer
@@ -420,12 +396,9 @@ define <8 x i32> @vwsll_vx_i8_v8i32_sext(<8 x i16> %a, i8 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_sext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -448,12 +421,9 @@ define <8 x i32> @vwsll_vx_i8_v8i32_zext(<8 x i16> %a, i8 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer
@@ -473,9 +443,9 @@ define <8 x i32> @vwsll_vi_v8i32(<8 x i16> %a) {
;
; CHECK-ZVBB-LABEL: vwsll_vi_v8i32:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <8 x i16> %a to <8 x i32>
%z = shl <8 x i32> %x, splat (i32 2)
@@ -497,10 +467,9 @@ define <16 x i16> @vwsll_vv_v16i16_sext(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_sext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <16 x i8> %a to <16 x i16>
%y = sext <16 x i8> %b to <16 x i16>
@@ -519,10 +488,9 @@ define <16 x i16> @vwsll_vv_v16i16_zext(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <16 x i8> %a to <16 x i16>
%y = zext <16 x i8> %b to <16 x i16>
@@ -552,12 +520,9 @@ define <16 x i16> @vwsll_vx_i32_v16i16(<16 x i8> %a, i32 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v16i16:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v12, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vnsrl.wi v8, v12, 0
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8
+; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i32> poison, i32 %b, i32 0
%splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer
@@ -577,9 +542,9 @@ define <16 x i16> @vwsll_vx_i16_v16i16(<16 x i8> %a, i16 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v16i16:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i16> poison, i16 %b, i32 0
%splat = shufflevector <16 x i16> %head, <16 x i16> poison, <16 x i32> zeroinitializer
@@ -603,10 +568,8 @@ define <16 x i16> @vwsll_vx_i8_v16i16_sext(<16 x i8> %a, i8 %b) {
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i8> poison, i8 %b, i32 0
%splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -631,10 +594,8 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) {
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i8> poison, i8 %b, i32 0
%splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer
@@ -654,9 +615,9 @@ define <16 x i16> @vwsll_vi_v16i16(<16 x i8> %a) {
;
; CHECK-ZVBB-LABEL: vwsll_vi_v16i16:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8
-; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2
+; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%x = zext <16 x i8> %a to <16 x i16>
%z = shl <16 x i16> %x, splat (i16 2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll
new file mode 100644
index 0000000..3a8d08f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+; The following binop x, (zext i1) tests will be vector-legalized into a vselect
+; of two splat_vectors, but on RV64 the splat value will be implicitly
+; truncated:
+;
+; t15: nxv2i32 = splat_vector Constant:i64<1>
+; t13: nxv2i32 = splat_vector Constant:i64<0>
+; t16: nxv2i32 = vselect t2, t15, t13
+; t7: nxv2i32 = add t4, t16
+;
+; Make sure that foldSelectWithIdentityConstant in DAGCombiner.cpp handles the
+; truncating splat, so we pull the vselect back and fold it into a mask.
+
+define <vscale x 2 x i32> @i1_zext_add(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: i1_zext_add:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
+; CHECK-NEXT: ret
+ %zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
+ %add = add <vscale x 2 x i32> %b, %zext
+ ret <vscale x 2 x i32> %add
+}
+
+define <vscale x 2 x i32> @i1_zext_add_commuted(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: i1_zext_add_commuted:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
+; CHECK-NEXT: ret
+ %zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
+ %add = add <vscale x 2 x i32> %zext, %b
+ ret <vscale x 2 x i32> %add
+}
+
+define <vscale x 2 x i32> @i1_zext_sub(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: i1_zext_sub:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
+ %sub = sub <vscale x 2 x i32> %b, %zext
+ ret <vscale x 2 x i32> %sub
+}
+
+define <vscale x 2 x i32> @i1_zext_or(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: i1_zext_or:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
+; CHECK-NEXT: vor.vi v8, v8, 1, v0.t
+; CHECK-NEXT: ret
+ %zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
+ %or = or <vscale x 2 x i32> %b, %zext
+ ret <vscale x 2 x i32> %or
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
index e56dca0..a14ce71 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll
@@ -149,49 +149,49 @@ define <vscale x 2 x i64> @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y
}
define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) {
-; RV32-LABEL: vwop_vscale_sext_i1i32_multiple_users:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, mu
-; RV32-NEXT: vlm.v v8, (a0)
-; RV32-NEXT: vlm.v v9, (a1)
-; RV32-NEXT: vlm.v v10, (a2)
-; RV32-NEXT: vmv.v.i v11, 0
-; RV32-NEXT: vmv.v.v v0, v8
-; RV32-NEXT: vmerge.vim v12, v11, -1, v0
-; RV32-NEXT: vmv.v.v v0, v9
-; RV32-NEXT: vmerge.vim v9, v11, -1, v0
-; RV32-NEXT: vmv.v.v v0, v10
-; RV32-NEXT: vmerge.vim v10, v11, -1, v0
-; RV32-NEXT: vmul.vv v9, v12, v9
-; RV32-NEXT: li a0, 1
-; RV32-NEXT: vsub.vv v11, v12, v10
-; RV32-NEXT: vmv.v.v v0, v8
-; RV32-NEXT: vsub.vx v10, v10, a0, v0.t
-; RV32-NEXT: vor.vv v8, v9, v10
-; RV32-NEXT: vor.vv v8, v8, v11
-; RV32-NEXT: ret
+; NO_FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users:
+; NO_FOLDING: # %bb.0:
+; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
+; NO_FOLDING-NEXT: vlm.v v8, (a0)
+; NO_FOLDING-NEXT: vlm.v v9, (a1)
+; NO_FOLDING-NEXT: vlm.v v10, (a2)
+; NO_FOLDING-NEXT: vmv.v.i v11, 0
+; NO_FOLDING-NEXT: vmv.v.v v0, v8
+; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; NO_FOLDING-NEXT: vmv.v.v v0, v9
+; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
+; NO_FOLDING-NEXT: vmv.v.v v0, v10
+; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
+; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
+; NO_FOLDING-NEXT: li a0, 1
+; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
+; NO_FOLDING-NEXT: vmv.v.v v0, v8
+; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v9, v10
+; NO_FOLDING-NEXT: vor.vv v8, v8, v11
+; NO_FOLDING-NEXT: ret
;
-; RV64-LABEL: vwop_vscale_sext_i1i32_multiple_users:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma
-; RV64-NEXT: vlm.v v8, (a0)
-; RV64-NEXT: vlm.v v9, (a1)
-; RV64-NEXT: vlm.v v10, (a2)
-; RV64-NEXT: vmv.v.i v11, 0
-; RV64-NEXT: vmv.v.v v0, v8
-; RV64-NEXT: vmerge.vim v12, v11, -1, v0
-; RV64-NEXT: vmv.v.v v0, v9
-; RV64-NEXT: vmerge.vim v9, v11, -1, v0
-; RV64-NEXT: vmv.v.v v0, v10
-; RV64-NEXT: vmerge.vim v10, v11, -1, v0
-; RV64-NEXT: vmul.vv v9, v12, v9
-; RV64-NEXT: vmv.v.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v11, 1, v0
-; RV64-NEXT: vsub.vv v8, v10, v8
-; RV64-NEXT: vsub.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v9, v8
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: ret
+; FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users:
+; FOLDING: # %bb.0:
+; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
+; FOLDING-NEXT: vlm.v v8, (a0)
+; FOLDING-NEXT: vlm.v v9, (a1)
+; FOLDING-NEXT: vlm.v v10, (a2)
+; FOLDING-NEXT: vmv.v.i v11, 0
+; FOLDING-NEXT: vmv.v.v v0, v8
+; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0
+; FOLDING-NEXT: vmv.v.v v0, v9
+; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0
+; FOLDING-NEXT: vmv.v.v v0, v10
+; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
+; FOLDING-NEXT: vmul.vv v9, v12, v9
+; FOLDING-NEXT: li a0, 1
+; FOLDING-NEXT: vsub.vv v11, v12, v10
+; FOLDING-NEXT: vmv.v.v v0, v8
+; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t
+; FOLDING-NEXT: vor.vv v8, v9, v10
+; FOLDING-NEXT: vor.vv v8, v8, v11
+; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
%b = load <vscale x 2 x i1>, ptr %y
%b2 = load <vscale x 2 x i1>, ptr %z
@@ -209,7 +209,7 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y,
define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) {
; NO_FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users:
; NO_FOLDING: # %bb.0:
-; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; NO_FOLDING-NEXT: vlm.v v8, (a0)
; NO_FOLDING-NEXT: vlm.v v9, (a1)
; NO_FOLDING-NEXT: vlm.v v10, (a2)
@@ -221,17 +221,17 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
; NO_FOLDING-NEXT: vmv1r.v v0, v10
; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
; NO_FOLDING-NEXT: vmul.vv v9, v12, v9
+; NO_FOLDING-NEXT: li a0, 1
+; NO_FOLDING-NEXT: vsub.vv v11, v12, v10
; NO_FOLDING-NEXT: vmv1r.v v0, v8
-; NO_FOLDING-NEXT: vmerge.vim v8, v11, 1, v0
-; NO_FOLDING-NEXT: vsub.vv v8, v10, v8
-; NO_FOLDING-NEXT: vsub.vv v10, v12, v10
-; NO_FOLDING-NEXT: vor.vv v8, v9, v8
-; NO_FOLDING-NEXT: vor.vv v8, v8, v10
+; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v9, v10
+; NO_FOLDING-NEXT: vor.vv v8, v8, v11
; NO_FOLDING-NEXT: ret
;
; FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users:
; FOLDING: # %bb.0:
-; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; FOLDING-NEXT: vlm.v v8, (a0)
; FOLDING-NEXT: vlm.v v9, (a1)
; FOLDING-NEXT: vlm.v v10, (a2)
@@ -243,12 +243,12 @@ define <vscale x 2 x i8> @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p
; FOLDING-NEXT: vmv1r.v v0, v10
; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0
; FOLDING-NEXT: vmul.vv v9, v12, v9
+; FOLDING-NEXT: li a0, 1
+; FOLDING-NEXT: vsub.vv v11, v12, v10
; FOLDING-NEXT: vmv1r.v v0, v8
-; FOLDING-NEXT: vmerge.vim v8, v11, 1, v0
-; FOLDING-NEXT: vsub.vv v8, v10, v8
-; FOLDING-NEXT: vsub.vv v10, v12, v10
-; FOLDING-NEXT: vor.vv v8, v9, v8
-; FOLDING-NEXT: vor.vv v8, v8, v10
+; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t
+; FOLDING-NEXT: vor.vv v8, v9, v10
+; FOLDING-NEXT: vor.vv v8, v8, v11
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
%b = load <vscale x 2 x i1>, ptr %y
@@ -444,41 +444,39 @@ define <vscale x 2 x i64> @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y
}
define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) {
-; RV32-LABEL: vwop_vscale_zext_i1i32_multiple_users:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, mu
-; RV32-NEXT: vlm.v v0, (a0)
-; RV32-NEXT: vlm.v v8, (a2)
-; RV32-NEXT: vlm.v v9, (a1)
-; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: vmerge.vim v11, v10, 1, v0
-; RV32-NEXT: vmv.v.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v10, 1, v0
-; RV32-NEXT: vadd.vv v10, v11, v8
-; RV32-NEXT: vsub.vv v8, v11, v8
-; RV32-NEXT: vmv.v.v v0, v9
-; RV32-NEXT: vor.vv v10, v10, v11, v0.t
-; RV32-NEXT: vor.vv v8, v10, v8
-; RV32-NEXT: ret
+; NO_FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users:
+; NO_FOLDING: # %bb.0:
+; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
+; NO_FOLDING-NEXT: vlm.v v0, (a0)
+; NO_FOLDING-NEXT: vlm.v v8, (a2)
+; NO_FOLDING-NEXT: vlm.v v9, (a1)
+; NO_FOLDING-NEXT: vmv.v.i v10, 0
+; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
+; NO_FOLDING-NEXT: vmv.v.v v0, v8
+; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
+; NO_FOLDING-NEXT: vadd.vv v10, v11, v8
+; NO_FOLDING-NEXT: vsub.vv v8, v11, v8
+; NO_FOLDING-NEXT: vmv.v.v v0, v9
+; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v10, v8
+; NO_FOLDING-NEXT: ret
;
-; RV64-LABEL: vwop_vscale_zext_i1i32_multiple_users:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma
-; RV64-NEXT: vlm.v v0, (a0)
-; RV64-NEXT: vlm.v v8, (a1)
-; RV64-NEXT: vlm.v v9, (a2)
-; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: vmerge.vim v11, v10, 1, v0
-; RV64-NEXT: vmv.v.v v0, v8
-; RV64-NEXT: vmerge.vim v8, v10, 1, v0
-; RV64-NEXT: vmv.v.v v0, v9
-; RV64-NEXT: vmerge.vim v9, v10, 1, v0
-; RV64-NEXT: vmul.vv v8, v11, v8
-; RV64-NEXT: vadd.vv v10, v11, v9
-; RV64-NEXT: vsub.vv v9, v11, v9
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: ret
+; FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users:
+; FOLDING: # %bb.0:
+; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu
+; FOLDING-NEXT: vlm.v v0, (a0)
+; FOLDING-NEXT: vlm.v v8, (a2)
+; FOLDING-NEXT: vlm.v v9, (a1)
+; FOLDING-NEXT: vmv.v.i v10, 0
+; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
+; FOLDING-NEXT: vmv.v.v v0, v8
+; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
+; FOLDING-NEXT: vadd.vv v10, v11, v8
+; FOLDING-NEXT: vsub.vv v8, v11, v8
+; FOLDING-NEXT: vmv.v.v v0, v9
+; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
+; FOLDING-NEXT: vor.vv v8, v10, v8
+; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
%b = load <vscale x 2 x i1>, ptr %y
%b2 = load <vscale x 2 x i1>, ptr %z
@@ -496,40 +494,36 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y,
define <vscale x 2 x i8> @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) {
; NO_FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users:
; NO_FOLDING: # %bb.0:
-; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; NO_FOLDING-NEXT: vlm.v v0, (a0)
-; NO_FOLDING-NEXT: vlm.v v8, (a1)
-; NO_FOLDING-NEXT: vlm.v v9, (a2)
+; NO_FOLDING-NEXT: vlm.v v8, (a2)
+; NO_FOLDING-NEXT: vlm.v v9, (a1)
; NO_FOLDING-NEXT: vmv.v.i v10, 0
; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
; NO_FOLDING-NEXT: vmv1r.v v0, v8
; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
+; NO_FOLDING-NEXT: vadd.vv v10, v11, v8
+; NO_FOLDING-NEXT: vsub.vv v8, v11, v8
; NO_FOLDING-NEXT: vmv1r.v v0, v9
-; NO_FOLDING-NEXT: vmerge.vim v9, v10, 1, v0
-; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
-; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
-; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
-; NO_FOLDING-NEXT: vor.vv v8, v8, v10
-; NO_FOLDING-NEXT: vor.vv v8, v8, v9
+; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
+; NO_FOLDING-NEXT: vor.vv v8, v10, v8
; NO_FOLDING-NEXT: ret
;
; FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users:
; FOLDING: # %bb.0:
-; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; FOLDING-NEXT: vlm.v v0, (a0)
-; FOLDING-NEXT: vlm.v v8, (a1)
-; FOLDING-NEXT: vlm.v v9, (a2)
+; FOLDING-NEXT: vlm.v v8, (a2)
+; FOLDING-NEXT: vlm.v v9, (a1)
; FOLDING-NEXT: vmv.v.i v10, 0
; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0
; FOLDING-NEXT: vmv1r.v v0, v8
; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0
+; FOLDING-NEXT: vadd.vv v10, v11, v8
+; FOLDING-NEXT: vsub.vv v8, v11, v8
; FOLDING-NEXT: vmv1r.v v0, v9
-; FOLDING-NEXT: vmerge.vim v9, v10, 1, v0
-; FOLDING-NEXT: vmul.vv v8, v11, v8
-; FOLDING-NEXT: vadd.vv v10, v11, v9
-; FOLDING-NEXT: vsub.vv v9, v11, v9
-; FOLDING-NEXT: vor.vv v8, v8, v10
-; FOLDING-NEXT: vor.vv v8, v8, v9
+; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t
+; FOLDING-NEXT: vor.vv v8, v10, v8
; FOLDING-NEXT: ret
%a = load <vscale x 2 x i1>, ptr %x
%b = load <vscale x 2 x i1>, ptr %y
@@ -594,3 +588,6 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y,
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index 0d52dd7..0a5e501 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -825,3 +825,56 @@ define <vscale x 2 x i1> @select_cond_x_cond(<vscale x 2 x i1> %x, <vscale x 2 x
%a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> %x, i32 %evl)
ret <vscale x 2 x i1> %a
}
+
+define <vscale x 2 x i1> @select_undef_T_F(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 zeroext %evl) {
+; CHECK-LABEL: select_undef_T_F:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 %evl)
+ ret <vscale x 2 x i1> %a
+}
+
+define <vscale x 2 x i1> @select_undef_undef_F(<vscale x 2 x i1> %x, i32 zeroext %evl) {
+; CHECK-LABEL: select_undef_undef_F:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> poison, <vscale x 2 x i1> undef, <vscale x 2 x i1> %x, i32 %evl)
+ ret <vscale x 2 x i1> %a
+}
+
+define <vscale x 2 x i1> @select_unknown_undef_F(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 zeroext %evl) {
+; CHECK-LABEL: select_unknown_undef_F:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> undef, <vscale x 2 x i1> %y, i32 %evl)
+ ret <vscale x 2 x i1> %a
+}
+
+define <vscale x 2 x i1> @select_unknown_T_undef(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 zeroext %evl) {
+; CHECK-LABEL: select_unknown_T_undef:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> poison, i32 %evl)
+ ret <vscale x 2 x i1> %a
+}
+
+define <vscale x 2 x i1> @select_false_T_F(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> %z, i32 zeroext %evl) {
+; CHECK-LABEL: select_false_T_F:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i1> %y, <vscale x 2 x i1> %z, i32 %evl)
+ ret <vscale x 2 x i1> %a
+}
+
+define <vscale x 2 x i1> @select_unknown_T_T(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, i32 zeroext %evl) {
+; CHECK-LABEL: select_unknown_T_T:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %a = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %x, <vscale x 2 x i1> %y, <vscale x 2 x i1> %y, i32 %evl)
+ ret <vscale x 2 x i1> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
index 770bb56..082de2e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
@@ -627,3 +627,259 @@ define <vscale x 8 x i16> @vwsll_vi_nxv8i16(<vscale x 8 x i8> %a) {
%z = shl <vscale x 8 x i16> %x, splat (i16 2)
ret <vscale x 8 x i16> %z
}
+
+; ==============================================================================
+; i8 -> i64
+; ==============================================================================
+
+define <vscale x 2 x i64> @vwsll_vv_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) {
+; CHECK-LABEL: vwsll_vv_nxv2i64_nxv2i8_sext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vsext.vf8 v12, v9
+; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_nxv2i8_sext:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: ret
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %y = sext <vscale x 2 x i8> %b to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %y
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vv_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) {
+; CHECK-LABEL: vwsll_vv_nxv2i64_nxv2i8_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vzext.vf8 v12, v9
+; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_nxv2i8_zext:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: ret
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %y = zext <vscale x 2 x i8> %b to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %y
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vx_i64_nxv2i64_nxv2i8(<vscale x 2 x i8> %a, i64 %b) {
+; CHECK-LABEL: vwsll_vx_i64_nxv2i64_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vsll.vx v8, v10, a0
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv2i64_nxv2i8:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
+ %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %splat
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a, i32 %b) {
+; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vsext.vf2 v12, v9
+; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %y = sext <vscale x 2 x i32> %splat to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %y
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, i32 %b) {
+; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vzext.vf2 v12, v9
+; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
+ %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %y = zext <vscale x 2 x i32> %splat to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %y
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a, i16 %b) {
+; CHECK-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_sext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vsext.vf4 v12, v9
+; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_sext:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %y = sext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %y
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, i16 %b) {
+; CHECK-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vzext.vf4 v12, v9
+; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %y = zext <vscale x 2 x i16> %splat to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %y
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_sext(<vscale x 2 x i8> %a, i8 %b) {
+; CHECK-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_sext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vsext.vf8 v12, v9
+; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_sext:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %y = sext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %y
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, i8 %b) {
+; CHECK-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vzext.vf8 v12, v9
+; CHECK-NEXT: vsll.vv v8, v10, v12
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
+; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9
+; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: ret
+ %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
+ %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %y = zext <vscale x 2 x i8> %splat to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, %y
+ ret <vscale x 2 x i64> %z
+}
+
+define <vscale x 2 x i64> @vwsll_vi_nxv2i64_nxv2i8(<vscale x 2 x i8> %a) {
+; CHECK-LABEL: vwsll_vi_nxv2i64_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vzext.vf8 v10, v8
+; CHECK-NEXT: vsll.vi v8, v10, 2
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2
+; CHECK-ZVBB-NEXT: ret
+ %x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ %z = shl <vscale x 2 x i64> %x, splat (i64 2)
+ ret <vscale x 2 x i64> %z
+}
diff --git a/llvm/test/CodeGen/SPARC/inlineasm-bad.ll b/llvm/test/CodeGen/SPARC/inlineasm-bad.ll
index 5bf2adb..07eb67d 100644
--- a/llvm/test/CodeGen/SPARC/inlineasm-bad.ll
+++ b/llvm/test/CodeGen/SPARC/inlineasm-bad.ll
@@ -11,3 +11,12 @@ entry:
tail call void asm sideeffect "faddq $0,$1,$2", "{f38},{f0},{f0}"(fp128 0xL0, fp128 0xL0, fp128 0xL0)
ret void
}
+
+; CHECK-label:test_twinword_error
+; CHECK: error: Hi part of pair should point to an even-numbered register
+; CHECK: error: (note that in some cases it might be necessary to manually bind the input/output registers instead of relying on automatic allocation)
+
+define i64 @test_twinword_error(){
+ %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i1}"()
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/SPARC/inlineasm.ll b/llvm/test/CodeGen/SPARC/inlineasm.ll
index ec27598..9817d7c 100644
--- a/llvm/test/CodeGen/SPARC/inlineasm.ll
+++ b/llvm/test/CodeGen/SPARC/inlineasm.ll
@@ -143,3 +143,12 @@ entry:
%1 = call double asm sideeffect "faddd $1, $2, $0", "=f,f,e"(i64 0, i64 0)
ret void
}
+
+; CHECK-label:test_twinword
+; CHECK: rd %asr5, %i1
+; CHECK: srlx %i1, 32, %i0
+
+define i64 @test_twinword(){
+ %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i0}"()
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll
index a4ca3aa..6057bf38 100644
--- a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll
+++ b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll
@@ -1,10 +1,14 @@
-; REQUIRES: spirv-tools
-; RUN: llc -O0 -mtriple=spirv-unknown-linux %s -o - -filetype=obj | not spirv-val 2>&1 | FileCheck %s
+; All OpVariable instructions in a function must be the first instructions in the first block
-; TODO(#66261): The SPIR-V backend should reorder OpVariable instructions so this doesn't fail,
-; but in the meantime it's a good example of the spirv-val tool working as intended.
+; RUN: llc -O0 -mtriple=spirv-unknown-linux %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-linux %s -o - -filetype=obj | spirv-val %}
-; CHECK: All OpVariable instructions in a function must be the first instructions in the first block.
+; CHECK-SPIRV: OpFunction
+; CHECK-SPIRV-NEXT: OpLabel
+; CHECK-SPIRV-NEXT: OpVariable
+; CHECK-SPIRV-NEXT: OpVariable
+; CHECK-SPIRV: OpReturn
+; CHECK-SPIRV: OpFunctionEnd
define void @main() #1 {
entry:
diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll
index 1071d34..b039f80 100644
--- a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll
+++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll
@@ -10,22 +10,46 @@
; CHECK-SPIRV-DAG: OpName %[[FooObj:.*]] "foo_object"
; CHECK-SPIRV-DAG: OpName %[[FooMemOrder:.*]] "mem_order"
; CHECK-SPIRV-DAG: OpName %[[FooFunc:.*]] "foo"
+
; CHECK-SPIRV-DAG: %[[TyLong:.*]] = OpTypeInt 32 0
; CHECK-SPIRV-DAG: %[[TyVoid:.*]] = OpTypeVoid
+; CHECK-SPIRV-DAG: %[[TyGenPtrLong:.*]] = OpTypePointer Generic %[[TyLong]]
; CHECK-SPIRV-DAG: %[[TyPtrLong:.*]] = OpTypePointer CrossWorkgroup %[[TyLong]]
; CHECK-SPIRV-DAG: %[[TyFunPtrLong:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrLong]]
-; CHECK-SPIRV-DAG: %[[TyGenPtrLong:.*]] = OpTypePointer Generic %[[TyLong]]
+; CHECK-SPIRV-DAG: %[[TyGenPtrPtrLong:.*]] = OpTypePointer Generic %[[TyGenPtrLong]]
; CHECK-SPIRV-DAG: %[[TyFunGenPtrLongLong:.*]] = OpTypeFunction %[[TyVoid]] %[[TyGenPtrLong]] %[[TyLong]]
+; CHECK-SPIRV-DAG: %[[TyChar:.*]] = OpTypeInt 8 0
+; CHECK-SPIRV-DAG: %[[TyGenPtrChar:.*]] = OpTypePointer Generic %[[TyChar]]
+; CHECK-SPIRV-DAG: %[[TyGenPtrPtrChar:.*]] = OpTypePointer Generic %[[TyGenPtrChar]]
+; CHECK-SPIRV-DAG: %[[TyFunPtrGenPtrChar:.*]] = OpTypePointer Function %[[TyGenPtrChar]]
; CHECK-SPIRV-DAG: %[[Const3:.*]] = OpConstant %[[TyLong]] 3
+
; CHECK-SPIRV: %[[FunTest]] = OpFunction %[[TyVoid]] None %[[TyFunPtrLong]]
; CHECK-SPIRV: %[[ArgCum]] = OpFunctionParameter %[[TyPtrLong]]
+
; CHECK-SPIRV: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[Addr]] %[[Const3]]
+
+; CHECK-SPIRV: %[[HalfAddr:.*]] = OpPtrCastToGeneric
+; CHECK-SPIRV-NEXT: %[[HalfAddrCasted:.*]] = OpBitcast %[[TyGenPtrLong]] %[[HalfAddr]]
+; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[HalfAddrCasted]] %[[Const3]]
+
+; CHECK-SPIRV: %[[DblAddr:.*]] = OpPtrCastToGeneric
+; CHECK-SPIRV-NEXT: %[[DblAddrCasted:.*]] = OpBitcast %[[TyGenPtrLong]] %[[DblAddr]]
+; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[DblAddrCasted]] %[[Const3]]
+
; CHECK-SPIRV: %[[FooStub]] = OpFunction %[[TyVoid]] None %[[TyFunGenPtrLongLong]]
; CHECK-SPIRV: %[[StubObj]] = OpFunctionParameter %[[TyGenPtrLong]]
; CHECK-SPIRV: %[[MemOrder]] = OpFunctionParameter %[[TyLong]]
+
+; CHECK-SPIRV: %[[ObjectAddr:.*]] = OpVariable %[[TyFunPtrGenPtrChar]] Function
+; CHECK-SPIRV-NEXT: %[[ToGeneric:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[ObjectAddr]]
+; CHECK-SPIRV-NEXT: %[[Casted:.*]] = OpBitcast %[[TyGenPtrPtrLong]] %[[ToGeneric]]
+; CHECK-SPIRV-NEXT: OpStore %[[Casted]] %[[StubObj]]
+
; CHECK-SPIRV: %[[FooFunc]] = OpFunction %[[TyVoid]] None %[[TyFunGenPtrLongLong]]
; CHECK-SPIRV: %[[FooObj]] = OpFunctionParameter %[[TyGenPtrLong]]
; CHECK-SPIRV: %[[FooMemOrder]] = OpFunctionParameter %[[TyLong]]
+
; CHECK-SPIRV: OpFunctionCall %[[TyVoid]] %[[FooStub]] %[[FooObj]] %[[FooMemOrder]]
define spir_kernel void @test(ptr addrspace(1) noundef align 4 %_arg_cum) {
diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll
new file mode 100644
index 0000000..edb31ff
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll
@@ -0,0 +1,60 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-SPIRV-DAG: OpName %[[Foo:.*]] "foo"
+; CHECK-SPIRV-DAG: %[[TyChar:.*]] = OpTypeInt 8 0
+; CHECK-SPIRV-DAG: %[[TyVoid:.*]] = OpTypeVoid
+; CHECK-SPIRV-DAG: %[[TyGenPtrChar:.*]] = OpTypePointer Generic %[[TyChar]]
+; CHECK-SPIRV-DAG: %[[TyFunBar:.*]] = OpTypeFunction %[[TyVoid]] %[[TyGenPtrChar]]
+; CHECK-SPIRV-DAG: %[[TyLong:.*]] = OpTypeInt 64 0
+; CHECK-SPIRV-DAG: %[[TyGenPtrPtrChar:.*]] = OpTypePointer Generic %[[TyGenPtrChar]]
+; CHECK-SPIRV-DAG: %[[TyFunFoo:.*]] = OpTypeFunction %[[TyVoid]] %[[TyLong]] %[[TyGenPtrPtrChar]] %[[TyGenPtrPtrChar]]
+; CHECK-SPIRV-DAG: %[[TyStruct:.*]] = OpTypeStruct %[[TyLong]]
+; CHECK-SPIRV-DAG: %[[Const100:.*]] = OpConstant %[[TyLong]] 100
+; CHECK-SPIRV-DAG: %[[TyFunPtrGenPtrChar:.*]] = OpTypePointer Function %[[TyGenPtrChar]]
+; CHECK-SPIRV-DAG: %[[TyPtrStruct:.*]] = OpTypePointer Generic %[[TyStruct]]
+; CHECK-SPIRV-DAG: %[[TyPtrLong:.*]] = OpTypePointer Generic %[[TyLong]]
+
+; CHECK-SPIRV: %[[Bar:.*]] = OpFunction %[[TyVoid]] None %[[TyFunBar]]
+; CHECK-SPIRV: %[[BarArg:.*]] = OpFunctionParameter %[[TyGenPtrChar]]
+; CHECK-SPIRV-NEXT: OpLabel
+; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function
+; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function
+; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function
+; CHECK-SPIRV: %[[Var1:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[#]]
+; CHECK-SPIRV: %[[Var2:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[#]]
+; CHECK-SPIRV: OpStore %[[#]] %[[BarArg]]
+; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[Foo]] %[[Const100]] %[[Var1]] %[[Var2]]
+; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[Foo]] %[[Const100]] %[[Var2]] %[[Var1]]
+
+; CHECK-SPIRV: %[[Foo]] = OpFunction %[[TyVoid]] None %[[TyFunFoo]]
+; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyLong]]
+; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyGenPtrPtrChar]]
+; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyGenPtrPtrChar]]
+
+%class.CustomType = type { i64 }
+
+define linkonce_odr dso_local spir_func void @bar(ptr addrspace(4) noundef %first) {
+entry:
+ %first.addr = alloca ptr addrspace(4)
+ %first.addr.ascast = addrspacecast ptr %first.addr to ptr addrspace(4)
+ %temp = alloca ptr addrspace(4), align 8
+ %temp.ascast = addrspacecast ptr %temp to ptr addrspace(4)
+ store ptr addrspace(4) %first, ptr %first.addr
+ call spir_func void @foo(i64 noundef 100, ptr addrspace(4) noundef dereferenceable(8) %first.addr.ascast, ptr addrspace(4) noundef dereferenceable(8) %temp.ascast)
+ call spir_func void @foo(i64 noundef 100, ptr addrspace(4) noundef dereferenceable(8) %temp.ascast, ptr addrspace(4) noundef dereferenceable(8) %first.addr.ascast)
+ %var = alloca ptr addrspace(4), align 8
+ ret void
+}
+
+define linkonce_odr dso_local spir_func void @foo(i64 noundef %offset, ptr addrspace(4) noundef dereferenceable(8) %in_acc1, ptr addrspace(4) noundef dereferenceable(8) %out_acc1) {
+entry:
+ %r0 = load ptr addrspace(4), ptr addrspace(4) %in_acc1
+ %arrayidx = getelementptr inbounds %class.CustomType, ptr addrspace(4) %r0, i64 42
+ %r1 = load i64, ptr addrspace(4) %arrayidx
+ %r3 = load ptr addrspace(4), ptr addrspace(4) %out_acc1
+ %r4 = getelementptr %class.CustomType, ptr addrspace(4) %r3, i64 43
+ store i64 %r1, ptr addrspace(4) %r4
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/WebAssembly/multi-return.ll b/llvm/test/CodeGen/WebAssembly/multi-return.ll
index 3429cd5..293a1b3 100644
--- a/llvm/test/CodeGen/WebAssembly/multi-return.ll
+++ b/llvm/test/CodeGen/WebAssembly/multi-return.ll
@@ -78,18 +78,16 @@ define i64 @test4() {
define { i64, i128 } @test5() {
; CHECK-LABEL: test5:
; CHECK: call return_multi_multi
-; CHECK: i32.const $push8=, 8
-; CHECK: i32.add $push9=, $[[SP:[0-9]+]], $pop8
-; CHECK: i32.const $push0=, 16
-; CHECK: i32.add $push1=, $pop9, $pop0
+; CHECK: i32.const $push0=, 24
+; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0
; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1)
; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]])
; CHECK: i64.load $push2=, 16($[[SP]])
; CHECK: i64.store 8($0), $pop2
+; CHECK: i64.store 16($0), $[[L1]]
; CHECK: i64.store 0($0), $[[L2]]
-; CHECK: i32.const $push12=, 16
-; CHECK: i32.add $push3=, $0, $pop12
-; CHECK: i64.store 0($pop3), $[[L1]]
+; CHECK: i32.const $push5=, 80
+; CHECK: i32.add $push6=, $3, $pop5
%t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi()
%r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0
%r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1
@@ -101,20 +99,20 @@ define { i64, i128 } @test5() {
define { i128, i128 } @test6() {
; CHECK-LABEL: test6:
; CHECK: call return_multi_multi
-; CHECK: i32.const $push0=, 64
+; CHECK: i32.const $push0=, 24
; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0
; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1)
-; CHECK: i32.const $push2=, 24
+; CHECK: i32.const $push2=, 64
; CHECK: i32.add $push3=, $[[SP]], $pop2
; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3)
; CHECK: i64.load $[[L3:[0-9]+]]=, 16($[[SP]])
; CHECK: i64.load $push4=, 56($[[SP]])
; CHECK: i64.store 16($0), $pop4
+; CHECK: i64.store 24($0), $[[L2]]
; CHECK: i64.store 0($0), $[[L3]]
-; CHECK: i64.store 8($0), $[[L2]]
-; CHECK: i32.const $push5=, 24
-; CHECK: i32.add $push6=, $0, $pop5
-; CHECK: i64.store 0($pop6), $[[L1]]
+; CHECK: i64.store 8($0), $[[L1]]
+; CHECK: i32.const $push7=, 80
+; CHECK: i32.add $push8=, $4, $pop7
%t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi()
%r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1
%r3 = extractvalue { i64, i128, i192, i128, i64 } %t0, 3
@@ -129,19 +127,17 @@ define { i64, i192 } @test7() {
; CHECK: i32.const $push0=, 40
; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0
; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1)
+; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]])
+; CHECK: i64.load $[[L3:[0-9]+]]=, 32($[[SP]])
; CHECK: i32.const $push2=, 48
; CHECK: i32.add $push3=, $[[SP]], $pop2
-; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3)
-; CHECK: i64.load $[[L3:[0-9]+]]=, 8($[[SP]])
-; CHECK: i64.load $push4=, 32($[[SP]])
-; CHECK: i64.store 8($0), $pop4
-; CHECK: i64.store 0($0), $[[L3]]
-; CHECK: i32.const $push5=, 24
-; CHECK: i32.add $push6=, $0, $pop5
-; CHECK: i64.store 0($pop6), $[[L2]]
-; CHECK: i32.const $push7=, 16
-; CHECK: i32.add $push8=, $0, $pop7
-; CHECK: i64.store 0($pop8), $[[L1]]
+; CHECK: i64.load $push4=, 0($pop3)
+; CHECK: i64.store 24($0), $pop4
+; CHECK: i64.store 8($0), $[[L3]]
+; CHECK: i64.store 16($0), $[[L1]]
+; CHECK: i64.store 0($0), $[[L2]]
+; CHECK: i32.const $push7=, 80
+; CHECK: i32.add $push8=, $4, $pop7
%t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi()
%r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0
%r2 = extractvalue { i64, i128, i192, i128, i64 } %t0, 2
@@ -153,18 +149,16 @@ define { i64, i192 } @test7() {
define { i128, i192, i128, i64 } @test8() {
; CHECK-LABEL: test8:
; CHECK: call return_multi_multi
-; CHECK: i32.const $push18=, 8
-; CHECK: i32.add $push19=, $[[SP:[0-9]+]], $pop18
-; CHECK: i32.const $push0=, 32
-; CHECK: i32.add $push1=, $pop19, $pop0
+; CHECK: i32.const $push0=, 64
+; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0
; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1)
-; CHECK: i32.const $push2=, 48
+; CHECK: i32.const $push2=, 40
; CHECK: i32.add $push3=, $[[SP]], $pop2
; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3)
-; CHECK: i32.const $push4=, 24
+; CHECK: i32.const $push4=, 48
; CHECK: i32.add $push5=, $[[SP]], $pop4
; CHECK: i64.load $[[L3:[0-9]+]]=, 0($pop5)
-; CHECK: i32.const $push6=, 64
+; CHECK: i32.const $push6=, 24
; CHECK: i32.add $push7=, $[[SP]], $pop6
; CHECK: i64.load $[[L4:[0-9]+]]=, 0($pop7)
; CHECK: i64.load $[[L5:[0-9]+]]=, 8($[[SP]])
@@ -172,19 +166,15 @@ define { i128, i192, i128, i64 } @test8() {
; CHECK: i64.load $[[L7:[0-9]+]]=, 32($[[SP]])
; CHECK: i64.load $push8=, 16($[[SP]])
; CHECK: i64.store 40($0), $pop8
+; CHECK: i64.store 48($0), $[[L4]]
+; CHECK: i64.store 32($0), $[[L3]]
; CHECK: i64.store 16($0), $[[L7]]
+; CHECK: i64.store 24($0), $[[L2]]
; CHECK: i64.store 0($0), $[[L6]]
-; CHECK: i64.store 8($0), $[[L4]]
+; CHECK: i64.store 8($0), $[[L1]]
; CHECK: i64.store 56($0), $[[L5]]
-; CHECK: i32.const $push9=, 48
-; CHECK: i32.add $push10=, $0, $pop9
-; CHECK: i64.store 0($pop10), $[[L3]]
-; CHECK: i32.const $push22=, 32
-; CHECK: i32.add $push11=, $0, $pop22
-; CHECK: i64.store 0($pop11), $[[L2]]
-; CHECK: i32.const $push12=, 24
-; CHECK: i32.add $push13=, $0, $pop12
-; CHECK: i64.store 0($pop13), $[[L1]]
+; CHECK: i32.const $push11=, 80
+; CHECK: i32.add $push12=, $8, $pop11
%t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi()
%r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0
%r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 3a806b9..761a754 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -31,60 +31,38 @@ define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: add_v16i8:
; NO-SIMD128: .functype add_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.add $push0=, $9, $25
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop0
-; NO-SIMD128-NEXT: i32.add $push1=, $5, $21
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop1
-; NO-SIMD128-NEXT: i32.add $push2=, $3, $19
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-NEXT: i32.add $push3=, $2, $18
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop3
-; NO-SIMD128-NEXT: i32.add $push4=, $1, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 15
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.add $push5=, $16, $32
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 14
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.add $push8=, $15, $31
-; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 13
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.add $push11=, $14, $30
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.add $push14=, $13, $29
-; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push18=, 11
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.add $push17=, $12, $28
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17
-; NO-SIMD128-NEXT: i32.const $push21=, 10
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.add $push20=, $11, $27
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push24=, 9
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.add $push23=, $10, $26
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push27=, 7
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.add $push26=, $8, $24
-; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-NEXT: i32.const $push30=, 6
-; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-NEXT: i32.add $push29=, $7, $23
-; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29
-; NO-SIMD128-NEXT: i32.const $push33=, 5
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.add $push32=, $6, $22
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push36=, 3
-; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-NEXT: i32.add $push35=, $4, $20
-; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35
+; NO-SIMD128-NEXT: i32.add $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop0
+; NO-SIMD128-NEXT: i32.add $push1=, $15, $31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop1
+; NO-SIMD128-NEXT: i32.add $push2=, $14, $30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop2
+; NO-SIMD128-NEXT: i32.add $push3=, $13, $29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop3
+; NO-SIMD128-NEXT: i32.add $push4=, $12, $28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop4
+; NO-SIMD128-NEXT: i32.add $push5=, $11, $27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop5
+; NO-SIMD128-NEXT: i32.add $push6=, $10, $26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop6
+; NO-SIMD128-NEXT: i32.add $push7=, $9, $25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop7
+; NO-SIMD128-NEXT: i32.add $push8=, $8, $24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-NEXT: i32.add $push9=, $7, $23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop9
+; NO-SIMD128-NEXT: i32.add $push10=, $6, $22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop10
+; NO-SIMD128-NEXT: i32.add $push11=, $5, $21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop11
+; NO-SIMD128-NEXT: i32.add $push12=, $4, $20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-NEXT: i32.add $push13=, $3, $19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-NEXT: i32.add $push14=, $2, $18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop14
+; NO-SIMD128-NEXT: i32.add $push15=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: add_v16i8:
@@ -96,54 +74,32 @@ define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $19
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15
-; NO-SIMD128-FAST-NEXT: i32.add $push16=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.add $push19=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $11, $27
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.add $push25=, $12, $28
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $13, $29
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $14, $30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $15, $31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $16, $32
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37
+; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $20
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.add $push4=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.add $push5=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.add $push6=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.add $push7=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.add $push8=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.add $push9=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.add $push10=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.add $push11=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.add $push12=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.add $push13=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.add $push14=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.add $push15=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%a = add <16 x i8> %x, %y
ret <16 x i8> %a
@@ -165,60 +121,38 @@ define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: sub_v16i8:
; NO-SIMD128: .functype sub_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.sub $push0=, $9, $25
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop0
-; NO-SIMD128-NEXT: i32.sub $push1=, $5, $21
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop1
-; NO-SIMD128-NEXT: i32.sub $push2=, $3, $19
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-NEXT: i32.sub $push3=, $2, $18
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop3
-; NO-SIMD128-NEXT: i32.sub $push4=, $1, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 15
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.sub $push5=, $16, $32
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 14
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.sub $push8=, $15, $31
-; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 13
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.sub $push11=, $14, $30
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.sub $push14=, $13, $29
-; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push18=, 11
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.sub $push17=, $12, $28
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17
-; NO-SIMD128-NEXT: i32.const $push21=, 10
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.sub $push20=, $11, $27
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push24=, 9
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.sub $push23=, $10, $26
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push27=, 7
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.sub $push26=, $8, $24
-; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-NEXT: i32.const $push30=, 6
-; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-NEXT: i32.sub $push29=, $7, $23
-; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29
-; NO-SIMD128-NEXT: i32.const $push33=, 5
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.sub $push32=, $6, $22
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push36=, 3
-; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-NEXT: i32.sub $push35=, $4, $20
-; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35
+; NO-SIMD128-NEXT: i32.sub $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop0
+; NO-SIMD128-NEXT: i32.sub $push1=, $15, $31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop1
+; NO-SIMD128-NEXT: i32.sub $push2=, $14, $30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop2
+; NO-SIMD128-NEXT: i32.sub $push3=, $13, $29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop3
+; NO-SIMD128-NEXT: i32.sub $push4=, $12, $28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop4
+; NO-SIMD128-NEXT: i32.sub $push5=, $11, $27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop5
+; NO-SIMD128-NEXT: i32.sub $push6=, $10, $26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop6
+; NO-SIMD128-NEXT: i32.sub $push7=, $9, $25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop7
+; NO-SIMD128-NEXT: i32.sub $push8=, $8, $24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-NEXT: i32.sub $push9=, $7, $23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop9
+; NO-SIMD128-NEXT: i32.sub $push10=, $6, $22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop10
+; NO-SIMD128-NEXT: i32.sub $push11=, $5, $21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop11
+; NO-SIMD128-NEXT: i32.sub $push12=, $4, $20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-NEXT: i32.sub $push13=, $3, $19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-NEXT: i32.sub $push14=, $2, $18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop14
+; NO-SIMD128-NEXT: i32.sub $push15=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: sub_v16i8:
@@ -230,54 +164,32 @@ define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $19
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15
-; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.sub $push19=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-FAST-NEXT: i32.sub $push22=, $11, $27
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $12, $28
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.sub $push28=, $13, $29
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-FAST-NEXT: i32.sub $push31=, $14, $30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $15, $31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $16, $32
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37
+; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $20
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.sub $push11=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.sub $push14=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%a = sub <16 x i8> %x, %y
ret <16 x i8> %a
@@ -425,60 +337,38 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: mul_v16i8:
; NO-SIMD128: .functype mul_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.mul $push0=, $9, $25
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop0
-; NO-SIMD128-NEXT: i32.mul $push1=, $5, $21
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop1
-; NO-SIMD128-NEXT: i32.mul $push2=, $3, $19
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-NEXT: i32.mul $push3=, $2, $18
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop3
-; NO-SIMD128-NEXT: i32.mul $push4=, $1, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 15
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.mul $push5=, $16, $32
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 14
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.mul $push8=, $15, $31
-; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 13
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.mul $push11=, $14, $30
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.mul $push14=, $13, $29
-; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push18=, 11
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.mul $push17=, $12, $28
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17
-; NO-SIMD128-NEXT: i32.const $push21=, 10
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.mul $push20=, $11, $27
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push24=, 9
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.mul $push23=, $10, $26
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push27=, 7
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.mul $push26=, $8, $24
-; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-NEXT: i32.const $push30=, 6
-; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-NEXT: i32.mul $push29=, $7, $23
-; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29
-; NO-SIMD128-NEXT: i32.const $push33=, 5
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.mul $push32=, $6, $22
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push36=, 3
-; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-NEXT: i32.mul $push35=, $4, $20
-; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35
+; NO-SIMD128-NEXT: i32.mul $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop0
+; NO-SIMD128-NEXT: i32.mul $push1=, $15, $31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop1
+; NO-SIMD128-NEXT: i32.mul $push2=, $14, $30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop2
+; NO-SIMD128-NEXT: i32.mul $push3=, $13, $29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop3
+; NO-SIMD128-NEXT: i32.mul $push4=, $12, $28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop4
+; NO-SIMD128-NEXT: i32.mul $push5=, $11, $27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop5
+; NO-SIMD128-NEXT: i32.mul $push6=, $10, $26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop6
+; NO-SIMD128-NEXT: i32.mul $push7=, $9, $25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop7
+; NO-SIMD128-NEXT: i32.mul $push8=, $8, $24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-NEXT: i32.mul $push9=, $7, $23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop9
+; NO-SIMD128-NEXT: i32.mul $push10=, $6, $22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop10
+; NO-SIMD128-NEXT: i32.mul $push11=, $5, $21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop11
+; NO-SIMD128-NEXT: i32.mul $push12=, $4, $20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-NEXT: i32.mul $push13=, $3, $19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-NEXT: i32.mul $push14=, $2, $18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop14
+; NO-SIMD128-NEXT: i32.mul $push15=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: mul_v16i8:
@@ -490,54 +380,32 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $19
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15
-; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.mul $push19=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-FAST-NEXT: i32.mul $push22=, $11, $27
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $12, $28
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.mul $push28=, $13, $29
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $14, $30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.mul $push34=, $15, $31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.mul $push37=, $16, $32
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37
+; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $20
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.mul $push4=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.mul $push7=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.mul $push10=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%a = mul <16 x i8> %x, %y
ret <16 x i8> %a
@@ -559,108 +427,86 @@ define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: min_s_v16i8:
; NO-SIMD128: .functype min_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push4=, 15
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16
; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32
; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0
; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3
-; NO-SIMD128-NEXT: i32.const $push10=, 14
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15
-; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31
-; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9
-; NO-SIMD128-NEXT: i32.const $push16=, 13
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14
-; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop3
+; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15
+; NO-SIMD128-NEXT: i32.extend8_s $push4=, $31
+; NO-SIMD128-NEXT: i32.lt_s $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.select $push7=, $15, $31, $pop6
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop7
+; NO-SIMD128-NEXT: i32.extend8_s $push9=, $14
+; NO-SIMD128-NEXT: i32.extend8_s $push8=, $30
+; NO-SIMD128-NEXT: i32.lt_s $push10=, $pop9, $pop8
+; NO-SIMD128-NEXT: i32.select $push11=, $14, $30, $pop10
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop11
+; NO-SIMD128-NEXT: i32.extend8_s $push13=, $13
+; NO-SIMD128-NEXT: i32.extend8_s $push12=, $29
; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12
-; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push22=, 12
-; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13
-; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29
-; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18
-; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20
-; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21
-; NO-SIMD128-NEXT: i32.const $push28=, 11
-; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12
-; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28
+; NO-SIMD128-NEXT: i32.select $push15=, $13, $29, $pop14
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop15
+; NO-SIMD128-NEXT: i32.extend8_s $push17=, $12
+; NO-SIMD128-NEXT: i32.extend8_s $push16=, $28
+; NO-SIMD128-NEXT: i32.lt_s $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.select $push19=, $12, $28, $pop18
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop19
+; NO-SIMD128-NEXT: i32.extend8_s $push21=, $11
+; NO-SIMD128-NEXT: i32.extend8_s $push20=, $27
+; NO-SIMD128-NEXT: i32.lt_s $push22=, $pop21, $pop20
+; NO-SIMD128-NEXT: i32.select $push23=, $11, $27, $pop22
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop23
+; NO-SIMD128-NEXT: i32.extend8_s $push25=, $10
+; NO-SIMD128-NEXT: i32.extend8_s $push24=, $26
; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24
-; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26
-; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27
-; NO-SIMD128-NEXT: i32.const $push34=, 10
-; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34
-; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11
-; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27
-; NO-SIMD128-NEXT: i32.lt_s $push32=, $pop31, $pop30
-; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32
-; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33
-; NO-SIMD128-NEXT: i32.const $push40=, 9
-; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40
-; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10
-; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26
+; NO-SIMD128-NEXT: i32.select $push27=, $10, $26, $pop26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop27
+; NO-SIMD128-NEXT: i32.extend8_s $push29=, $9
+; NO-SIMD128-NEXT: i32.extend8_s $push28=, $25
+; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28
+; NO-SIMD128-NEXT: i32.select $push31=, $9, $25, $pop30
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop31
+; NO-SIMD128-NEXT: i32.extend8_s $push33=, $8
+; NO-SIMD128-NEXT: i32.extend8_s $push32=, $24
+; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32
+; NO-SIMD128-NEXT: i32.select $push35=, $8, $24, $pop34
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop35
+; NO-SIMD128-NEXT: i32.extend8_s $push37=, $7
+; NO-SIMD128-NEXT: i32.extend8_s $push36=, $23
; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36
-; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38
-; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39
-; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9
-; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25
-; NO-SIMD128-NEXT: i32.lt_s $push44=, $pop43, $pop42
-; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop45
-; NO-SIMD128-NEXT: i32.const $push50=, 7
-; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50
-; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8
-; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24
-; NO-SIMD128-NEXT: i32.lt_s $push48=, $pop47, $pop46
-; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48
-; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49
-; NO-SIMD128-NEXT: i32.const $push56=, 6
-; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56
-; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7
-; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23
+; NO-SIMD128-NEXT: i32.select $push39=, $7, $23, $pop38
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop39
+; NO-SIMD128-NEXT: i32.extend8_s $push41=, $6
+; NO-SIMD128-NEXT: i32.extend8_s $push40=, $22
+; NO-SIMD128-NEXT: i32.lt_s $push42=, $pop41, $pop40
+; NO-SIMD128-NEXT: i32.select $push43=, $6, $22, $pop42
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop43
+; NO-SIMD128-NEXT: i32.extend8_s $push45=, $5
+; NO-SIMD128-NEXT: i32.extend8_s $push44=, $21
+; NO-SIMD128-NEXT: i32.lt_s $push46=, $pop45, $pop44
+; NO-SIMD128-NEXT: i32.select $push47=, $5, $21, $pop46
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop47
+; NO-SIMD128-NEXT: i32.extend8_s $push49=, $4
+; NO-SIMD128-NEXT: i32.extend8_s $push48=, $20
+; NO-SIMD128-NEXT: i32.lt_s $push50=, $pop49, $pop48
+; NO-SIMD128-NEXT: i32.select $push51=, $4, $20, $pop50
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop51
+; NO-SIMD128-NEXT: i32.extend8_s $push53=, $3
+; NO-SIMD128-NEXT: i32.extend8_s $push52=, $19
; NO-SIMD128-NEXT: i32.lt_s $push54=, $pop53, $pop52
-; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54
-; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55
-; NO-SIMD128-NEXT: i32.const $push62=, 5
-; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62
-; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6
-; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22
-; NO-SIMD128-NEXT: i32.lt_s $push60=, $pop59, $pop58
-; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60
-; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61
-; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5
-; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21
-; NO-SIMD128-NEXT: i32.lt_s $push66=, $pop65, $pop64
-; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop67
-; NO-SIMD128-NEXT: i32.const $push72=, 3
-; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72
-; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4
-; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20
-; NO-SIMD128-NEXT: i32.lt_s $push70=, $pop69, $pop68
-; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70
-; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71
-; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3
-; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19
-; NO-SIMD128-NEXT: i32.lt_s $push76=, $pop75, $pop74
-; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop77
-; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2
-; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18
-; NO-SIMD128-NEXT: i32.lt_s $push80=, $pop79, $pop78
-; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop81
-; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1
-; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17
-; NO-SIMD128-NEXT: i32.lt_s $push84=, $pop83, $pop82
-; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop85
+; NO-SIMD128-NEXT: i32.select $push55=, $3, $19, $pop54
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop55
+; NO-SIMD128-NEXT: i32.extend8_s $push57=, $2
+; NO-SIMD128-NEXT: i32.extend8_s $push56=, $18
+; NO-SIMD128-NEXT: i32.lt_s $push58=, $pop57, $pop56
+; NO-SIMD128-NEXT: i32.select $push59=, $2, $18, $pop58
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop59
+; NO-SIMD128-NEXT: i32.extend8_s $push61=, $1
+; NO-SIMD128-NEXT: i32.extend8_s $push60=, $17
+; NO-SIMD128-NEXT: i32.lt_s $push62=, $pop61, $pop60
+; NO-SIMD128-NEXT: i32.select $push63=, $1, $17, $pop62
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop63
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_s_v16i8:
@@ -681,93 +527,71 @@ define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8
; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20
; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12
; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18
-; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22
-; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $5
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $21
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $21, $pop18
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop19
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $22
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push22=, $pop21, $pop20
+; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $22, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop23
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push26=, $pop25, $pop24
+; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $23, $pop26
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop27
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $8
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $24
; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28
-; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34
-; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25
+; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $24, $pop30
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop31
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $9
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $25
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push34=, $pop33, $pop32
+; NO-SIMD128-FAST-NEXT: i32.select $push35=, $9, $25, $pop34
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $10
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push36=, $26
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push38=, $pop37, $pop36
+; NO-SIMD128-FAST-NEXT: i32.select $push39=, $10, $26, $pop38
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop39
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $11
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $27
; NO-SIMD128-FAST-NEXT: i32.lt_s $push42=, $pop41, $pop40
-; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43
-; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26
+; NO-SIMD128-FAST-NEXT: i32.select $push43=, $11, $27, $pop42
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop43
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $28
; NO-SIMD128-FAST-NEXT: i32.lt_s $push46=, $pop45, $pop44
-; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47
-; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push52=, $pop51, $pop50
-; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53
-; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28
+; NO-SIMD128-FAST-NEXT: i32.select $push47=, $12, $28, $pop46
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop47
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $13
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push48=, $29
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push50=, $pop49, $pop48
+; NO-SIMD128-FAST-NEXT: i32.select $push51=, $13, $29, $pop50
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop51
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $14
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push52=, $30
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push54=, $pop53, $pop52
+; NO-SIMD128-FAST-NEXT: i32.select $push55=, $14, $30, $pop54
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop55
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $15
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $31
; NO-SIMD128-FAST-NEXT: i32.lt_s $push58=, $pop57, $pop56
-; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59
-; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push64=, $pop63, $pop62
-; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65
-; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push70=, $pop69, $pop68
-; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71
-; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push76=, $pop75, $pop74
-; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77
-; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push82=, $pop81, $pop80
-; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83
+; NO-SIMD128-FAST-NEXT: i32.select $push59=, $15, $31, $pop58
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop59
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push61=, $16
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $32
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push62=, $pop61, $pop60
+; NO-SIMD128-FAST-NEXT: i32.select $push63=, $16, $32, $pop62
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop63
; NO-SIMD128-FAST-NEXT: return
%c = icmp slt <16 x i8> %x, %y
%a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
@@ -790,140 +614,118 @@ define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: min_u_v16i8:
; NO-SIMD128: .functype min_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 15
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.const $push0=, 255
; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0
-; NO-SIMD128-NEXT: i32.const $push117=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117
+; NO-SIMD128-NEXT: i32.const $push95=, 255
+; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop95
; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1
; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3
-; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push11=, 14
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.const $push116=, 255
-; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116
-; NO-SIMD128-NEXT: i32.const $push115=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115
-; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9
-; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push17=, 13
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.const $push114=, 255
-; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114
-; NO-SIMD128-NEXT: i32.const $push113=, 255
-; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113
-; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15
-; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push23=, 12
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.const $push112=, 255
-; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112
-; NO-SIMD128-NEXT: i32.const $push111=, 255
-; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111
-; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19
-; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21
-; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22
-; NO-SIMD128-NEXT: i32.const $push29=, 11
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.const $push110=, 255
-; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110
-; NO-SIMD128-NEXT: i32.const $push109=, 255
-; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109
-; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25
-; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push35=, 10
-; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-NEXT: i32.const $push108=, 255
-; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108
-; NO-SIMD128-NEXT: i32.const $push107=, 255
-; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107
-; NO-SIMD128-NEXT: i32.lt_u $push33=, $pop32, $pop31
-; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33
-; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34
-; NO-SIMD128-NEXT: i32.const $push41=, 9
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.const $push106=, 255
-; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106
-; NO-SIMD128-NEXT: i32.const $push105=, 255
-; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105
-; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37
-; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.const $push104=, 255
-; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104
-; NO-SIMD128-NEXT: i32.const $push103=, 255
-; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103
-; NO-SIMD128-NEXT: i32.lt_u $push45=, $pop44, $pop43
-; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop46
-; NO-SIMD128-NEXT: i32.const $push51=, 7
-; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-NEXT: i32.const $push102=, 255
-; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102
-; NO-SIMD128-NEXT: i32.const $push101=, 255
-; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101
-; NO-SIMD128-NEXT: i32.lt_u $push49=, $pop48, $pop47
-; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49
-; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50
-; NO-SIMD128-NEXT: i32.const $push57=, 6
-; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57
-; NO-SIMD128-NEXT: i32.const $push100=, 255
-; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100
-; NO-SIMD128-NEXT: i32.const $push99=, 255
-; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99
-; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53
-; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55
-; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56
-; NO-SIMD128-NEXT: i32.const $push63=, 5
-; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63
-; NO-SIMD128-NEXT: i32.const $push98=, 255
-; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98
-; NO-SIMD128-NEXT: i32.const $push97=, 255
-; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97
-; NO-SIMD128-NEXT: i32.lt_u $push61=, $pop60, $pop59
-; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61
-; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62
-; NO-SIMD128-NEXT: i32.const $push96=, 255
-; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96
-; NO-SIMD128-NEXT: i32.const $push95=, 255
-; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95
-; NO-SIMD128-NEXT: i32.lt_u $push67=, $pop66, $pop65
-; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop68
-; NO-SIMD128-NEXT: i32.const $push73=, 3
-; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop4
; NO-SIMD128-NEXT: i32.const $push94=, 255
-; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94
+; NO-SIMD128-NEXT: i32.and $push6=, $15, $pop94
; NO-SIMD128-NEXT: i32.const $push93=, 255
-; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93
-; NO-SIMD128-NEXT: i32.lt_u $push71=, $pop70, $pop69
-; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71
-; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72
+; NO-SIMD128-NEXT: i32.and $push5=, $31, $pop93
+; NO-SIMD128-NEXT: i32.lt_u $push7=, $pop6, $pop5
+; NO-SIMD128-NEXT: i32.select $push8=, $15, $31, $pop7
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop8
; NO-SIMD128-NEXT: i32.const $push92=, 255
-; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92
+; NO-SIMD128-NEXT: i32.and $push10=, $14, $pop92
; NO-SIMD128-NEXT: i32.const $push91=, 255
-; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91
-; NO-SIMD128-NEXT: i32.lt_u $push77=, $pop76, $pop75
-; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop78
+; NO-SIMD128-NEXT: i32.and $push9=, $30, $pop91
+; NO-SIMD128-NEXT: i32.lt_u $push11=, $pop10, $pop9
+; NO-SIMD128-NEXT: i32.select $push12=, $14, $30, $pop11
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop12
; NO-SIMD128-NEXT: i32.const $push90=, 255
-; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90
+; NO-SIMD128-NEXT: i32.and $push14=, $13, $pop90
; NO-SIMD128-NEXT: i32.const $push89=, 255
-; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89
-; NO-SIMD128-NEXT: i32.lt_u $push81=, $pop80, $pop79
-; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop82
+; NO-SIMD128-NEXT: i32.and $push13=, $29, $pop89
+; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.select $push16=, $13, $29, $pop15
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop16
; NO-SIMD128-NEXT: i32.const $push88=, 255
-; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88
+; NO-SIMD128-NEXT: i32.and $push18=, $12, $pop88
; NO-SIMD128-NEXT: i32.const $push87=, 255
-; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87
-; NO-SIMD128-NEXT: i32.lt_u $push85=, $pop84, $pop83
-; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop86
+; NO-SIMD128-NEXT: i32.and $push17=, $28, $pop87
+; NO-SIMD128-NEXT: i32.lt_u $push19=, $pop18, $pop17
+; NO-SIMD128-NEXT: i32.select $push20=, $12, $28, $pop19
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop20
+; NO-SIMD128-NEXT: i32.const $push86=, 255
+; NO-SIMD128-NEXT: i32.and $push22=, $11, $pop86
+; NO-SIMD128-NEXT: i32.const $push85=, 255
+; NO-SIMD128-NEXT: i32.and $push21=, $27, $pop85
+; NO-SIMD128-NEXT: i32.lt_u $push23=, $pop22, $pop21
+; NO-SIMD128-NEXT: i32.select $push24=, $11, $27, $pop23
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop24
+; NO-SIMD128-NEXT: i32.const $push84=, 255
+; NO-SIMD128-NEXT: i32.and $push26=, $10, $pop84
+; NO-SIMD128-NEXT: i32.const $push83=, 255
+; NO-SIMD128-NEXT: i32.and $push25=, $26, $pop83
+; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25
+; NO-SIMD128-NEXT: i32.select $push28=, $10, $26, $pop27
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop28
+; NO-SIMD128-NEXT: i32.const $push82=, 255
+; NO-SIMD128-NEXT: i32.and $push30=, $9, $pop82
+; NO-SIMD128-NEXT: i32.const $push81=, 255
+; NO-SIMD128-NEXT: i32.and $push29=, $25, $pop81
+; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29
+; NO-SIMD128-NEXT: i32.select $push32=, $9, $25, $pop31
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop32
+; NO-SIMD128-NEXT: i32.const $push80=, 255
+; NO-SIMD128-NEXT: i32.and $push34=, $8, $pop80
+; NO-SIMD128-NEXT: i32.const $push79=, 255
+; NO-SIMD128-NEXT: i32.and $push33=, $24, $pop79
+; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33
+; NO-SIMD128-NEXT: i32.select $push36=, $8, $24, $pop35
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop36
+; NO-SIMD128-NEXT: i32.const $push78=, 255
+; NO-SIMD128-NEXT: i32.and $push38=, $7, $pop78
+; NO-SIMD128-NEXT: i32.const $push77=, 255
+; NO-SIMD128-NEXT: i32.and $push37=, $23, $pop77
+; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37
+; NO-SIMD128-NEXT: i32.select $push40=, $7, $23, $pop39
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop40
+; NO-SIMD128-NEXT: i32.const $push76=, 255
+; NO-SIMD128-NEXT: i32.and $push42=, $6, $pop76
+; NO-SIMD128-NEXT: i32.const $push75=, 255
+; NO-SIMD128-NEXT: i32.and $push41=, $22, $pop75
+; NO-SIMD128-NEXT: i32.lt_u $push43=, $pop42, $pop41
+; NO-SIMD128-NEXT: i32.select $push44=, $6, $22, $pop43
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop44
+; NO-SIMD128-NEXT: i32.const $push74=, 255
+; NO-SIMD128-NEXT: i32.and $push46=, $5, $pop74
+; NO-SIMD128-NEXT: i32.const $push73=, 255
+; NO-SIMD128-NEXT: i32.and $push45=, $21, $pop73
+; NO-SIMD128-NEXT: i32.lt_u $push47=, $pop46, $pop45
+; NO-SIMD128-NEXT: i32.select $push48=, $5, $21, $pop47
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop48
+; NO-SIMD128-NEXT: i32.const $push72=, 255
+; NO-SIMD128-NEXT: i32.and $push50=, $4, $pop72
+; NO-SIMD128-NEXT: i32.const $push71=, 255
+; NO-SIMD128-NEXT: i32.and $push49=, $20, $pop71
+; NO-SIMD128-NEXT: i32.lt_u $push51=, $pop50, $pop49
+; NO-SIMD128-NEXT: i32.select $push52=, $4, $20, $pop51
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop52
+; NO-SIMD128-NEXT: i32.const $push70=, 255
+; NO-SIMD128-NEXT: i32.and $push54=, $3, $pop70
+; NO-SIMD128-NEXT: i32.const $push69=, 255
+; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop69
+; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53
+; NO-SIMD128-NEXT: i32.select $push56=, $3, $19, $pop55
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop56
+; NO-SIMD128-NEXT: i32.const $push68=, 255
+; NO-SIMD128-NEXT: i32.and $push58=, $2, $pop68
+; NO-SIMD128-NEXT: i32.const $push67=, 255
+; NO-SIMD128-NEXT: i32.and $push57=, $18, $pop67
+; NO-SIMD128-NEXT: i32.lt_u $push59=, $pop58, $pop57
+; NO-SIMD128-NEXT: i32.select $push60=, $2, $18, $pop59
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop60
+; NO-SIMD128-NEXT: i32.const $push66=, 255
+; NO-SIMD128-NEXT: i32.and $push62=, $1, $pop66
+; NO-SIMD128-NEXT: i32.const $push65=, 255
+; NO-SIMD128-NEXT: i32.and $push61=, $17, $pop65
+; NO-SIMD128-NEXT: i32.lt_u $push63=, $pop62, $pop61
+; NO-SIMD128-NEXT: i32.select $push64=, $1, $17, $pop63
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop64
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_u_v16i8:
@@ -931,138 +733,116 @@ define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117
+; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop95
; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116
-; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115
+; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop94
+; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop93
; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5
; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114
-; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113
+; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop92
+; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop91
; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9
; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112
-; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111
+; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop90
+; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop89
; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13
; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110
-; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19
-; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108
-; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106
-; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop88
+; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $21, $pop87
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push19=, $pop18, $pop17
+; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $21, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop86
+; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push21=, $22, $pop85
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push23=, $pop22, $pop21
+; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $22, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop84
+; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop83
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push27=, $pop26, $pop25
+; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $23, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop82
+; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $24, $pop81
; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29
-; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104
-; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35
-; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38
-; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102
-; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101
+; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $24, $pop31
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32
+; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop80
+; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop79
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push35=, $pop34, $pop33
+; NO-SIMD128-FAST-NEXT: i32.select $push36=, $9, $25, $pop35
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push38=, $10, $pop78
+; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push37=, $26, $pop77
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push39=, $pop38, $pop37
+; NO-SIMD128-FAST-NEXT: i32.select $push40=, $10, $26, $pop39
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40
+; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop76
+; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop75
; NO-SIMD128-FAST-NEXT: i32.lt_u $push43=, $pop42, $pop41
-; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100
-; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99
+; NO-SIMD128-FAST-NEXT: i32.select $push44=, $11, $27, $pop43
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44
+; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push46=, $12, $pop74
+; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push45=, $28, $pop73
; NO-SIMD128-FAST-NEXT: i32.lt_u $push47=, $pop46, $pop45
-; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55
-; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98
-; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push53=, $pop52, $pop51
-; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61
-; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96
-; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95
+; NO-SIMD128-FAST-NEXT: i32.select $push48=, $12, $28, $pop47
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48
+; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push50=, $13, $pop72
+; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push49=, $29, $pop71
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push51=, $pop50, $pop49
+; NO-SIMD128-FAST-NEXT: i32.select $push52=, $13, $29, $pop51
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52
+; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push54=, $14, $pop70
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push53=, $30, $pop69
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push55=, $pop54, $pop53
+; NO-SIMD128-FAST-NEXT: i32.select $push56=, $14, $30, $pop55
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56
+; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push58=, $15, $pop68
+; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push57=, $31, $pop67
; NO-SIMD128-FAST-NEXT: i32.lt_u $push59=, $pop58, $pop57
-; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60
-; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67
-; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94
-; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push65=, $pop64, $pop63
-; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66
-; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73
-; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92
-; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push71=, $pop70, $pop69
-; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72
-; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79
-; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90
-; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push77=, $pop76, $pop75
-; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78
-; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85
-; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88
-; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push83=, $pop82, $pop81
-; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84
+; NO-SIMD128-FAST-NEXT: i32.select $push60=, $15, $31, $pop59
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push62=, $16, $pop66
+; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push61=, $32, $pop65
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push63=, $pop62, $pop61
+; NO-SIMD128-FAST-NEXT: i32.select $push64=, $16, $32, $pop63
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64
; NO-SIMD128-FAST-NEXT: return
%c = icmp ult <16 x i8> %x, %y
%a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
@@ -1085,108 +865,86 @@ define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: max_s_v16i8:
; NO-SIMD128: .functype max_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push4=, 15
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16
; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32
; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0
; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3
-; NO-SIMD128-NEXT: i32.const $push10=, 14
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15
-; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31
-; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9
-; NO-SIMD128-NEXT: i32.const $push16=, 13
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14
-; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop3
+; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15
+; NO-SIMD128-NEXT: i32.extend8_s $push4=, $31
+; NO-SIMD128-NEXT: i32.gt_s $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.select $push7=, $15, $31, $pop6
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop7
+; NO-SIMD128-NEXT: i32.extend8_s $push9=, $14
+; NO-SIMD128-NEXT: i32.extend8_s $push8=, $30
+; NO-SIMD128-NEXT: i32.gt_s $push10=, $pop9, $pop8
+; NO-SIMD128-NEXT: i32.select $push11=, $14, $30, $pop10
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop11
+; NO-SIMD128-NEXT: i32.extend8_s $push13=, $13
+; NO-SIMD128-NEXT: i32.extend8_s $push12=, $29
; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12
-; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push22=, 12
-; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13
-; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29
-; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18
-; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20
-; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21
-; NO-SIMD128-NEXT: i32.const $push28=, 11
-; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12
-; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28
+; NO-SIMD128-NEXT: i32.select $push15=, $13, $29, $pop14
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop15
+; NO-SIMD128-NEXT: i32.extend8_s $push17=, $12
+; NO-SIMD128-NEXT: i32.extend8_s $push16=, $28
+; NO-SIMD128-NEXT: i32.gt_s $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.select $push19=, $12, $28, $pop18
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop19
+; NO-SIMD128-NEXT: i32.extend8_s $push21=, $11
+; NO-SIMD128-NEXT: i32.extend8_s $push20=, $27
+; NO-SIMD128-NEXT: i32.gt_s $push22=, $pop21, $pop20
+; NO-SIMD128-NEXT: i32.select $push23=, $11, $27, $pop22
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop23
+; NO-SIMD128-NEXT: i32.extend8_s $push25=, $10
+; NO-SIMD128-NEXT: i32.extend8_s $push24=, $26
; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24
-; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26
-; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27
-; NO-SIMD128-NEXT: i32.const $push34=, 10
-; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34
-; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11
-; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27
-; NO-SIMD128-NEXT: i32.gt_s $push32=, $pop31, $pop30
-; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32
-; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33
-; NO-SIMD128-NEXT: i32.const $push40=, 9
-; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40
-; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10
-; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26
+; NO-SIMD128-NEXT: i32.select $push27=, $10, $26, $pop26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop27
+; NO-SIMD128-NEXT: i32.extend8_s $push29=, $9
+; NO-SIMD128-NEXT: i32.extend8_s $push28=, $25
+; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28
+; NO-SIMD128-NEXT: i32.select $push31=, $9, $25, $pop30
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop31
+; NO-SIMD128-NEXT: i32.extend8_s $push33=, $8
+; NO-SIMD128-NEXT: i32.extend8_s $push32=, $24
+; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32
+; NO-SIMD128-NEXT: i32.select $push35=, $8, $24, $pop34
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop35
+; NO-SIMD128-NEXT: i32.extend8_s $push37=, $7
+; NO-SIMD128-NEXT: i32.extend8_s $push36=, $23
; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36
-; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38
-; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39
-; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9
-; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25
-; NO-SIMD128-NEXT: i32.gt_s $push44=, $pop43, $pop42
-; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop45
-; NO-SIMD128-NEXT: i32.const $push50=, 7
-; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50
-; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8
-; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24
-; NO-SIMD128-NEXT: i32.gt_s $push48=, $pop47, $pop46
-; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48
-; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49
-; NO-SIMD128-NEXT: i32.const $push56=, 6
-; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56
-; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7
-; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23
+; NO-SIMD128-NEXT: i32.select $push39=, $7, $23, $pop38
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop39
+; NO-SIMD128-NEXT: i32.extend8_s $push41=, $6
+; NO-SIMD128-NEXT: i32.extend8_s $push40=, $22
+; NO-SIMD128-NEXT: i32.gt_s $push42=, $pop41, $pop40
+; NO-SIMD128-NEXT: i32.select $push43=, $6, $22, $pop42
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop43
+; NO-SIMD128-NEXT: i32.extend8_s $push45=, $5
+; NO-SIMD128-NEXT: i32.extend8_s $push44=, $21
+; NO-SIMD128-NEXT: i32.gt_s $push46=, $pop45, $pop44
+; NO-SIMD128-NEXT: i32.select $push47=, $5, $21, $pop46
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop47
+; NO-SIMD128-NEXT: i32.extend8_s $push49=, $4
+; NO-SIMD128-NEXT: i32.extend8_s $push48=, $20
+; NO-SIMD128-NEXT: i32.gt_s $push50=, $pop49, $pop48
+; NO-SIMD128-NEXT: i32.select $push51=, $4, $20, $pop50
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop51
+; NO-SIMD128-NEXT: i32.extend8_s $push53=, $3
+; NO-SIMD128-NEXT: i32.extend8_s $push52=, $19
; NO-SIMD128-NEXT: i32.gt_s $push54=, $pop53, $pop52
-; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54
-; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55
-; NO-SIMD128-NEXT: i32.const $push62=, 5
-; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62
-; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6
-; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22
-; NO-SIMD128-NEXT: i32.gt_s $push60=, $pop59, $pop58
-; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60
-; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61
-; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5
-; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21
-; NO-SIMD128-NEXT: i32.gt_s $push66=, $pop65, $pop64
-; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop67
-; NO-SIMD128-NEXT: i32.const $push72=, 3
-; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72
-; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4
-; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20
-; NO-SIMD128-NEXT: i32.gt_s $push70=, $pop69, $pop68
-; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70
-; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71
-; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3
-; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19
-; NO-SIMD128-NEXT: i32.gt_s $push76=, $pop75, $pop74
-; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop77
-; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2
-; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18
-; NO-SIMD128-NEXT: i32.gt_s $push80=, $pop79, $pop78
-; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop81
-; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1
-; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17
-; NO-SIMD128-NEXT: i32.gt_s $push84=, $pop83, $pop82
-; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop85
+; NO-SIMD128-NEXT: i32.select $push55=, $3, $19, $pop54
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop55
+; NO-SIMD128-NEXT: i32.extend8_s $push57=, $2
+; NO-SIMD128-NEXT: i32.extend8_s $push56=, $18
+; NO-SIMD128-NEXT: i32.gt_s $push58=, $pop57, $pop56
+; NO-SIMD128-NEXT: i32.select $push59=, $2, $18, $pop58
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop59
+; NO-SIMD128-NEXT: i32.extend8_s $push61=, $1
+; NO-SIMD128-NEXT: i32.extend8_s $push60=, $17
+; NO-SIMD128-NEXT: i32.gt_s $push62=, $pop61, $pop60
+; NO-SIMD128-NEXT: i32.select $push63=, $1, $17, $pop62
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop63
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_s_v16i8:
@@ -1207,93 +965,71 @@ define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8
; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20
; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12
; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18
-; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22
-; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $5
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $21
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $21, $pop18
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop19
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $22
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push22=, $pop21, $pop20
+; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $22, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop23
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push26=, $pop25, $pop24
+; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $23, $pop26
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop27
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $8
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $24
; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28
-; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34
-; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25
+; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $24, $pop30
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop31
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $9
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $25
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push34=, $pop33, $pop32
+; NO-SIMD128-FAST-NEXT: i32.select $push35=, $9, $25, $pop34
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $10
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push36=, $26
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push38=, $pop37, $pop36
+; NO-SIMD128-FAST-NEXT: i32.select $push39=, $10, $26, $pop38
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop39
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $11
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $27
; NO-SIMD128-FAST-NEXT: i32.gt_s $push42=, $pop41, $pop40
-; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43
-; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26
+; NO-SIMD128-FAST-NEXT: i32.select $push43=, $11, $27, $pop42
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop43
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $28
; NO-SIMD128-FAST-NEXT: i32.gt_s $push46=, $pop45, $pop44
-; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47
-; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push52=, $pop51, $pop50
-; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53
-; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28
+; NO-SIMD128-FAST-NEXT: i32.select $push47=, $12, $28, $pop46
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop47
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $13
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push48=, $29
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push50=, $pop49, $pop48
+; NO-SIMD128-FAST-NEXT: i32.select $push51=, $13, $29, $pop50
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop51
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $14
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push52=, $30
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push54=, $pop53, $pop52
+; NO-SIMD128-FAST-NEXT: i32.select $push55=, $14, $30, $pop54
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop55
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $15
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $31
; NO-SIMD128-FAST-NEXT: i32.gt_s $push58=, $pop57, $pop56
-; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59
-; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push64=, $pop63, $pop62
-; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65
-; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push70=, $pop69, $pop68
-; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71
-; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push76=, $pop75, $pop74
-; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77
-; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push82=, $pop81, $pop80
-; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83
+; NO-SIMD128-FAST-NEXT: i32.select $push59=, $15, $31, $pop58
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop59
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push61=, $16
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $32
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push62=, $pop61, $pop60
+; NO-SIMD128-FAST-NEXT: i32.select $push63=, $16, $32, $pop62
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop63
; NO-SIMD128-FAST-NEXT: return
%c = icmp sgt <16 x i8> %x, %y
%a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
@@ -1316,140 +1052,118 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: max_u_v16i8:
; NO-SIMD128: .functype max_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 15
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.const $push0=, 255
; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0
-; NO-SIMD128-NEXT: i32.const $push117=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117
+; NO-SIMD128-NEXT: i32.const $push95=, 255
+; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop95
; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1
; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3
-; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push11=, 14
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.const $push116=, 255
-; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116
-; NO-SIMD128-NEXT: i32.const $push115=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115
-; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9
-; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push17=, 13
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.const $push114=, 255
-; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114
-; NO-SIMD128-NEXT: i32.const $push113=, 255
-; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113
-; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15
-; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push23=, 12
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.const $push112=, 255
-; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112
-; NO-SIMD128-NEXT: i32.const $push111=, 255
-; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111
-; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19
-; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21
-; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22
-; NO-SIMD128-NEXT: i32.const $push29=, 11
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.const $push110=, 255
-; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110
-; NO-SIMD128-NEXT: i32.const $push109=, 255
-; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109
-; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25
-; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push35=, 10
-; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-NEXT: i32.const $push108=, 255
-; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108
-; NO-SIMD128-NEXT: i32.const $push107=, 255
-; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107
-; NO-SIMD128-NEXT: i32.gt_u $push33=, $pop32, $pop31
-; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33
-; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34
-; NO-SIMD128-NEXT: i32.const $push41=, 9
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.const $push106=, 255
-; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106
-; NO-SIMD128-NEXT: i32.const $push105=, 255
-; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105
-; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37
-; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.const $push104=, 255
-; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104
-; NO-SIMD128-NEXT: i32.const $push103=, 255
-; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103
-; NO-SIMD128-NEXT: i32.gt_u $push45=, $pop44, $pop43
-; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop46
-; NO-SIMD128-NEXT: i32.const $push51=, 7
-; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-NEXT: i32.const $push102=, 255
-; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102
-; NO-SIMD128-NEXT: i32.const $push101=, 255
-; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101
-; NO-SIMD128-NEXT: i32.gt_u $push49=, $pop48, $pop47
-; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49
-; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50
-; NO-SIMD128-NEXT: i32.const $push57=, 6
-; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57
-; NO-SIMD128-NEXT: i32.const $push100=, 255
-; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100
-; NO-SIMD128-NEXT: i32.const $push99=, 255
-; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99
-; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53
-; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55
-; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56
-; NO-SIMD128-NEXT: i32.const $push63=, 5
-; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63
-; NO-SIMD128-NEXT: i32.const $push98=, 255
-; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98
-; NO-SIMD128-NEXT: i32.const $push97=, 255
-; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97
-; NO-SIMD128-NEXT: i32.gt_u $push61=, $pop60, $pop59
-; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61
-; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62
-; NO-SIMD128-NEXT: i32.const $push96=, 255
-; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96
-; NO-SIMD128-NEXT: i32.const $push95=, 255
-; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95
-; NO-SIMD128-NEXT: i32.gt_u $push67=, $pop66, $pop65
-; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop68
-; NO-SIMD128-NEXT: i32.const $push73=, 3
-; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop4
; NO-SIMD128-NEXT: i32.const $push94=, 255
-; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94
+; NO-SIMD128-NEXT: i32.and $push6=, $15, $pop94
; NO-SIMD128-NEXT: i32.const $push93=, 255
-; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93
-; NO-SIMD128-NEXT: i32.gt_u $push71=, $pop70, $pop69
-; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71
-; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72
+; NO-SIMD128-NEXT: i32.and $push5=, $31, $pop93
+; NO-SIMD128-NEXT: i32.gt_u $push7=, $pop6, $pop5
+; NO-SIMD128-NEXT: i32.select $push8=, $15, $31, $pop7
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop8
; NO-SIMD128-NEXT: i32.const $push92=, 255
-; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92
+; NO-SIMD128-NEXT: i32.and $push10=, $14, $pop92
; NO-SIMD128-NEXT: i32.const $push91=, 255
-; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91
-; NO-SIMD128-NEXT: i32.gt_u $push77=, $pop76, $pop75
-; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop78
+; NO-SIMD128-NEXT: i32.and $push9=, $30, $pop91
+; NO-SIMD128-NEXT: i32.gt_u $push11=, $pop10, $pop9
+; NO-SIMD128-NEXT: i32.select $push12=, $14, $30, $pop11
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop12
; NO-SIMD128-NEXT: i32.const $push90=, 255
-; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90
+; NO-SIMD128-NEXT: i32.and $push14=, $13, $pop90
; NO-SIMD128-NEXT: i32.const $push89=, 255
-; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89
-; NO-SIMD128-NEXT: i32.gt_u $push81=, $pop80, $pop79
-; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop82
+; NO-SIMD128-NEXT: i32.and $push13=, $29, $pop89
+; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.select $push16=, $13, $29, $pop15
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop16
; NO-SIMD128-NEXT: i32.const $push88=, 255
-; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88
+; NO-SIMD128-NEXT: i32.and $push18=, $12, $pop88
; NO-SIMD128-NEXT: i32.const $push87=, 255
-; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87
-; NO-SIMD128-NEXT: i32.gt_u $push85=, $pop84, $pop83
-; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop86
+; NO-SIMD128-NEXT: i32.and $push17=, $28, $pop87
+; NO-SIMD128-NEXT: i32.gt_u $push19=, $pop18, $pop17
+; NO-SIMD128-NEXT: i32.select $push20=, $12, $28, $pop19
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop20
+; NO-SIMD128-NEXT: i32.const $push86=, 255
+; NO-SIMD128-NEXT: i32.and $push22=, $11, $pop86
+; NO-SIMD128-NEXT: i32.const $push85=, 255
+; NO-SIMD128-NEXT: i32.and $push21=, $27, $pop85
+; NO-SIMD128-NEXT: i32.gt_u $push23=, $pop22, $pop21
+; NO-SIMD128-NEXT: i32.select $push24=, $11, $27, $pop23
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop24
+; NO-SIMD128-NEXT: i32.const $push84=, 255
+; NO-SIMD128-NEXT: i32.and $push26=, $10, $pop84
+; NO-SIMD128-NEXT: i32.const $push83=, 255
+; NO-SIMD128-NEXT: i32.and $push25=, $26, $pop83
+; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25
+; NO-SIMD128-NEXT: i32.select $push28=, $10, $26, $pop27
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop28
+; NO-SIMD128-NEXT: i32.const $push82=, 255
+; NO-SIMD128-NEXT: i32.and $push30=, $9, $pop82
+; NO-SIMD128-NEXT: i32.const $push81=, 255
+; NO-SIMD128-NEXT: i32.and $push29=, $25, $pop81
+; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29
+; NO-SIMD128-NEXT: i32.select $push32=, $9, $25, $pop31
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop32
+; NO-SIMD128-NEXT: i32.const $push80=, 255
+; NO-SIMD128-NEXT: i32.and $push34=, $8, $pop80
+; NO-SIMD128-NEXT: i32.const $push79=, 255
+; NO-SIMD128-NEXT: i32.and $push33=, $24, $pop79
+; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33
+; NO-SIMD128-NEXT: i32.select $push36=, $8, $24, $pop35
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop36
+; NO-SIMD128-NEXT: i32.const $push78=, 255
+; NO-SIMD128-NEXT: i32.and $push38=, $7, $pop78
+; NO-SIMD128-NEXT: i32.const $push77=, 255
+; NO-SIMD128-NEXT: i32.and $push37=, $23, $pop77
+; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37
+; NO-SIMD128-NEXT: i32.select $push40=, $7, $23, $pop39
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop40
+; NO-SIMD128-NEXT: i32.const $push76=, 255
+; NO-SIMD128-NEXT: i32.and $push42=, $6, $pop76
+; NO-SIMD128-NEXT: i32.const $push75=, 255
+; NO-SIMD128-NEXT: i32.and $push41=, $22, $pop75
+; NO-SIMD128-NEXT: i32.gt_u $push43=, $pop42, $pop41
+; NO-SIMD128-NEXT: i32.select $push44=, $6, $22, $pop43
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop44
+; NO-SIMD128-NEXT: i32.const $push74=, 255
+; NO-SIMD128-NEXT: i32.and $push46=, $5, $pop74
+; NO-SIMD128-NEXT: i32.const $push73=, 255
+; NO-SIMD128-NEXT: i32.and $push45=, $21, $pop73
+; NO-SIMD128-NEXT: i32.gt_u $push47=, $pop46, $pop45
+; NO-SIMD128-NEXT: i32.select $push48=, $5, $21, $pop47
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop48
+; NO-SIMD128-NEXT: i32.const $push72=, 255
+; NO-SIMD128-NEXT: i32.and $push50=, $4, $pop72
+; NO-SIMD128-NEXT: i32.const $push71=, 255
+; NO-SIMD128-NEXT: i32.and $push49=, $20, $pop71
+; NO-SIMD128-NEXT: i32.gt_u $push51=, $pop50, $pop49
+; NO-SIMD128-NEXT: i32.select $push52=, $4, $20, $pop51
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop52
+; NO-SIMD128-NEXT: i32.const $push70=, 255
+; NO-SIMD128-NEXT: i32.and $push54=, $3, $pop70
+; NO-SIMD128-NEXT: i32.const $push69=, 255
+; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop69
+; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53
+; NO-SIMD128-NEXT: i32.select $push56=, $3, $19, $pop55
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop56
+; NO-SIMD128-NEXT: i32.const $push68=, 255
+; NO-SIMD128-NEXT: i32.and $push58=, $2, $pop68
+; NO-SIMD128-NEXT: i32.const $push67=, 255
+; NO-SIMD128-NEXT: i32.and $push57=, $18, $pop67
+; NO-SIMD128-NEXT: i32.gt_u $push59=, $pop58, $pop57
+; NO-SIMD128-NEXT: i32.select $push60=, $2, $18, $pop59
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop60
+; NO-SIMD128-NEXT: i32.const $push66=, 255
+; NO-SIMD128-NEXT: i32.and $push62=, $1, $pop66
+; NO-SIMD128-NEXT: i32.const $push65=, 255
+; NO-SIMD128-NEXT: i32.and $push61=, $17, $pop65
+; NO-SIMD128-NEXT: i32.gt_u $push63=, $pop62, $pop61
+; NO-SIMD128-NEXT: i32.select $push64=, $1, $17, $pop63
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop64
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_u_v16i8:
@@ -1457,138 +1171,116 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117
+; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop95
; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116
-; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115
+; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop94
+; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop93
; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5
; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114
-; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113
+; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop92
+; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop91
; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9
; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112
-; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111
+; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop90
+; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop89
; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13
; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110
-; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19
-; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108
-; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106
-; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop88
+; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $21, $pop87
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push19=, $pop18, $pop17
+; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $21, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop86
+; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push21=, $22, $pop85
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push23=, $pop22, $pop21
+; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $22, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop84
+; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop83
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push27=, $pop26, $pop25
+; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $23, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop82
+; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $24, $pop81
; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29
-; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104
-; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35
-; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38
-; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102
-; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101
+; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $24, $pop31
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32
+; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop80
+; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop79
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push35=, $pop34, $pop33
+; NO-SIMD128-FAST-NEXT: i32.select $push36=, $9, $25, $pop35
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push38=, $10, $pop78
+; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push37=, $26, $pop77
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push39=, $pop38, $pop37
+; NO-SIMD128-FAST-NEXT: i32.select $push40=, $10, $26, $pop39
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40
+; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop76
+; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop75
; NO-SIMD128-FAST-NEXT: i32.gt_u $push43=, $pop42, $pop41
-; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100
-; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99
+; NO-SIMD128-FAST-NEXT: i32.select $push44=, $11, $27, $pop43
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44
+; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push46=, $12, $pop74
+; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push45=, $28, $pop73
; NO-SIMD128-FAST-NEXT: i32.gt_u $push47=, $pop46, $pop45
-; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55
-; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98
-; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push53=, $pop52, $pop51
-; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61
-; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96
-; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95
+; NO-SIMD128-FAST-NEXT: i32.select $push48=, $12, $28, $pop47
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48
+; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push50=, $13, $pop72
+; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push49=, $29, $pop71
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push51=, $pop50, $pop49
+; NO-SIMD128-FAST-NEXT: i32.select $push52=, $13, $29, $pop51
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52
+; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push54=, $14, $pop70
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push53=, $30, $pop69
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push55=, $pop54, $pop53
+; NO-SIMD128-FAST-NEXT: i32.select $push56=, $14, $30, $pop55
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56
+; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push58=, $15, $pop68
+; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push57=, $31, $pop67
; NO-SIMD128-FAST-NEXT: i32.gt_u $push59=, $pop58, $pop57
-; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60
-; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67
-; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94
-; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push65=, $pop64, $pop63
-; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66
-; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73
-; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92
-; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push71=, $pop70, $pop69
-; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72
-; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79
-; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90
-; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push77=, $pop76, $pop75
-; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78
-; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85
-; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88
-; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push83=, $pop82, $pop81
-; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84
+; NO-SIMD128-FAST-NEXT: i32.select $push60=, $15, $31, $pop59
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push62=, $16, $pop66
+; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push61=, $32, $pop65
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push63=, $pop62, $pop61
+; NO-SIMD128-FAST-NEXT: i32.select $push64=, $16, $32, $pop63
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64
; NO-SIMD128-FAST-NEXT: return
%c = icmp ugt <16 x i8> %x, %y
%a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
@@ -1611,156 +1303,134 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: avgr_u_v16i8:
; NO-SIMD128: .functype avgr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push0=, 15
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.add $push2=, $16, $32
-; NO-SIMD128-NEXT: i32.const $push3=, 1
-; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 254
-; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5
-; NO-SIMD128-NEXT: i32.const $push133=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133
-; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7
-; NO-SIMD128-NEXT: i32.const $push8=, 14
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.add $push10=, $15, $31
-; NO-SIMD128-NEXT: i32.const $push132=, 1
-; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132
-; NO-SIMD128-NEXT: i32.const $push131=, 254
-; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131
-; NO-SIMD128-NEXT: i32.const $push130=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13
-; NO-SIMD128-NEXT: i32.const $push14=, 13
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.add $push16=, $14, $30
-; NO-SIMD128-NEXT: i32.const $push129=, 1
-; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129
-; NO-SIMD128-NEXT: i32.const $push128=, 254
-; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128
-; NO-SIMD128-NEXT: i32.const $push127=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19
-; NO-SIMD128-NEXT: i32.const $push20=, 12
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.add $push22=, $13, $29
-; NO-SIMD128-NEXT: i32.const $push126=, 1
-; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126
-; NO-SIMD128-NEXT: i32.const $push125=, 254
-; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125
-; NO-SIMD128-NEXT: i32.const $push124=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124
-; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25
-; NO-SIMD128-NEXT: i32.const $push26=, 11
-; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-NEXT: i32.add $push28=, $12, $28
-; NO-SIMD128-NEXT: i32.const $push123=, 1
-; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123
-; NO-SIMD128-NEXT: i32.const $push122=, 254
-; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122
-; NO-SIMD128-NEXT: i32.const $push121=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121
-; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31
-; NO-SIMD128-NEXT: i32.const $push32=, 10
-; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-NEXT: i32.add $push34=, $11, $27
-; NO-SIMD128-NEXT: i32.const $push120=, 1
-; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120
-; NO-SIMD128-NEXT: i32.const $push119=, 254
-; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119
-; NO-SIMD128-NEXT: i32.const $push118=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118
-; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37
-; NO-SIMD128-NEXT: i32.const $push38=, 9
-; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38
-; NO-SIMD128-NEXT: i32.add $push40=, $10, $26
-; NO-SIMD128-NEXT: i32.const $push117=, 1
-; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117
-; NO-SIMD128-NEXT: i32.const $push116=, 254
-; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116
-; NO-SIMD128-NEXT: i32.const $push115=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115
-; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43
-; NO-SIMD128-NEXT: i32.add $push44=, $9, $25
-; NO-SIMD128-NEXT: i32.const $push114=, 1
-; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114
-; NO-SIMD128-NEXT: i32.const $push113=, 254
-; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113
-; NO-SIMD128-NEXT: i32.const $push112=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop47
-; NO-SIMD128-NEXT: i32.const $push48=, 7
-; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48
-; NO-SIMD128-NEXT: i32.add $push50=, $8, $24
+; NO-SIMD128-NEXT: i32.add $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.const $push1=, 1
+; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1
+; NO-SIMD128-NEXT: i32.const $push3=, 254
+; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3
; NO-SIMD128-NEXT: i32.const $push111=, 1
-; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111
-; NO-SIMD128-NEXT: i32.const $push110=, 254
-; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110
-; NO-SIMD128-NEXT: i32.const $push109=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109
-; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53
-; NO-SIMD128-NEXT: i32.const $push54=, 6
-; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54
-; NO-SIMD128-NEXT: i32.add $push56=, $7, $23
+; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop111
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop5
+; NO-SIMD128-NEXT: i32.add $push6=, $15, $31
+; NO-SIMD128-NEXT: i32.const $push110=, 1
+; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop110
+; NO-SIMD128-NEXT: i32.const $push109=, 254
+; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop109
; NO-SIMD128-NEXT: i32.const $push108=, 1
-; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108
-; NO-SIMD128-NEXT: i32.const $push107=, 254
-; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107
-; NO-SIMD128-NEXT: i32.const $push106=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106
-; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59
-; NO-SIMD128-NEXT: i32.const $push60=, 5
-; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60
-; NO-SIMD128-NEXT: i32.add $push62=, $6, $22
+; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop108
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop9
+; NO-SIMD128-NEXT: i32.add $push10=, $14, $30
+; NO-SIMD128-NEXT: i32.const $push107=, 1
+; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop107
+; NO-SIMD128-NEXT: i32.const $push106=, 254
+; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop106
; NO-SIMD128-NEXT: i32.const $push105=, 1
-; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105
-; NO-SIMD128-NEXT: i32.const $push104=, 254
-; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104
-; NO-SIMD128-NEXT: i32.const $push103=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103
-; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65
-; NO-SIMD128-NEXT: i32.add $push66=, $5, $21
+; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop105
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-NEXT: i32.add $push14=, $13, $29
+; NO-SIMD128-NEXT: i32.const $push104=, 1
+; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop104
+; NO-SIMD128-NEXT: i32.const $push103=, 254
+; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop103
; NO-SIMD128-NEXT: i32.const $push102=, 1
-; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102
-; NO-SIMD128-NEXT: i32.const $push101=, 254
-; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101
-; NO-SIMD128-NEXT: i32.const $push100=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop69
-; NO-SIMD128-NEXT: i32.const $push70=, 3
-; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70
-; NO-SIMD128-NEXT: i32.add $push72=, $4, $20
+; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop102
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop17
+; NO-SIMD128-NEXT: i32.add $push18=, $12, $28
+; NO-SIMD128-NEXT: i32.const $push101=, 1
+; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop101
+; NO-SIMD128-NEXT: i32.const $push100=, 254
+; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop100
; NO-SIMD128-NEXT: i32.const $push99=, 1
-; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99
-; NO-SIMD128-NEXT: i32.const $push98=, 254
-; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98
-; NO-SIMD128-NEXT: i32.const $push97=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97
-; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75
-; NO-SIMD128-NEXT: i32.add $push76=, $3, $19
+; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop99
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop21
+; NO-SIMD128-NEXT: i32.add $push22=, $11, $27
+; NO-SIMD128-NEXT: i32.const $push98=, 1
+; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop98
+; NO-SIMD128-NEXT: i32.const $push97=, 254
+; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop97
; NO-SIMD128-NEXT: i32.const $push96=, 1
-; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96
-; NO-SIMD128-NEXT: i32.const $push95=, 254
-; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95
-; NO-SIMD128-NEXT: i32.const $push94=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop79
-; NO-SIMD128-NEXT: i32.add $push80=, $2, $18
+; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop96
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop25
+; NO-SIMD128-NEXT: i32.add $push26=, $10, $26
+; NO-SIMD128-NEXT: i32.const $push95=, 1
+; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop95
+; NO-SIMD128-NEXT: i32.const $push94=, 254
+; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop94
; NO-SIMD128-NEXT: i32.const $push93=, 1
-; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93
-; NO-SIMD128-NEXT: i32.const $push92=, 254
-; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92
-; NO-SIMD128-NEXT: i32.const $push91=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop83
-; NO-SIMD128-NEXT: i32.add $push84=, $1, $17
+; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop93
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop29
+; NO-SIMD128-NEXT: i32.add $push30=, $9, $25
+; NO-SIMD128-NEXT: i32.const $push92=, 1
+; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop92
+; NO-SIMD128-NEXT: i32.const $push91=, 254
+; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop91
; NO-SIMD128-NEXT: i32.const $push90=, 1
-; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90
-; NO-SIMD128-NEXT: i32.const $push89=, 254
-; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89
-; NO-SIMD128-NEXT: i32.const $push88=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop87
+; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop90
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop33
+; NO-SIMD128-NEXT: i32.add $push34=, $8, $24
+; NO-SIMD128-NEXT: i32.const $push89=, 1
+; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop89
+; NO-SIMD128-NEXT: i32.const $push88=, 254
+; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop88
+; NO-SIMD128-NEXT: i32.const $push87=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop87
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop37
+; NO-SIMD128-NEXT: i32.add $push38=, $7, $23
+; NO-SIMD128-NEXT: i32.const $push86=, 1
+; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop86
+; NO-SIMD128-NEXT: i32.const $push85=, 254
+; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop85
+; NO-SIMD128-NEXT: i32.const $push84=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop84
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop41
+; NO-SIMD128-NEXT: i32.add $push42=, $6, $22
+; NO-SIMD128-NEXT: i32.const $push83=, 1
+; NO-SIMD128-NEXT: i32.add $push43=, $pop42, $pop83
+; NO-SIMD128-NEXT: i32.const $push82=, 254
+; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $pop82
+; NO-SIMD128-NEXT: i32.const $push81=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop81
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop45
+; NO-SIMD128-NEXT: i32.add $push46=, $5, $21
+; NO-SIMD128-NEXT: i32.const $push80=, 1
+; NO-SIMD128-NEXT: i32.add $push47=, $pop46, $pop80
+; NO-SIMD128-NEXT: i32.const $push79=, 254
+; NO-SIMD128-NEXT: i32.and $push48=, $pop47, $pop79
+; NO-SIMD128-NEXT: i32.const $push78=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop78
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop49
+; NO-SIMD128-NEXT: i32.add $push50=, $4, $20
+; NO-SIMD128-NEXT: i32.const $push77=, 1
+; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop77
+; NO-SIMD128-NEXT: i32.const $push76=, 254
+; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop76
+; NO-SIMD128-NEXT: i32.const $push75=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop75
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop53
+; NO-SIMD128-NEXT: i32.add $push54=, $3, $19
+; NO-SIMD128-NEXT: i32.const $push74=, 1
+; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop74
+; NO-SIMD128-NEXT: i32.const $push73=, 254
+; NO-SIMD128-NEXT: i32.and $push56=, $pop55, $pop73
+; NO-SIMD128-NEXT: i32.const $push72=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push57=, $pop56, $pop72
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop57
+; NO-SIMD128-NEXT: i32.add $push58=, $2, $18
+; NO-SIMD128-NEXT: i32.const $push71=, 1
+; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop71
+; NO-SIMD128-NEXT: i32.const $push70=, 254
+; NO-SIMD128-NEXT: i32.and $push60=, $pop59, $pop70
+; NO-SIMD128-NEXT: i32.const $push69=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop69
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop61
+; NO-SIMD128-NEXT: i32.add $push62=, $1, $17
+; NO-SIMD128-NEXT: i32.const $push68=, 1
+; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop68
+; NO-SIMD128-NEXT: i32.const $push67=, 254
+; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop67
+; NO-SIMD128-NEXT: i32.const $push66=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop66
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop65
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: avgr_u_v16i8:
@@ -1771,151 +1441,129 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1
; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133
+; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop111
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5
; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18
-; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132
-; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131
-; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130
+; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop110
+; NO-SIMD128-FAST-NEXT: i32.const $push109=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop109
+; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop108
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9
; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19
-; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129
-; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128
-; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127
-; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126
-; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125
-; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19
-; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123
-; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122
-; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120
-; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119
-; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117
-; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116
-; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114
-; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113
-; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41
-; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111
-; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110
-; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46
-; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108
-; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107
-; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52
-; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop107
+; NO-SIMD128-FAST-NEXT: i32.const $push106=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop106
; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105
-; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104
-; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57
-; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58
-; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop105
+; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $20
+; NO-SIMD128-FAST-NEXT: i32.const $push104=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop104
+; NO-SIMD128-FAST-NEXT: i32.const $push103=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop103
; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102
-; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101
-; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63
-; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64
-; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop102
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop17
+; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.const $push101=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop101
+; NO-SIMD128-FAST-NEXT: i32.const $push100=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop100
; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99
-; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98
-; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69
-; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70
-; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop99
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop98
+; NO-SIMD128-FAST-NEXT: i32.const $push97=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop97
; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96
-; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95
-; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75
-; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76
-; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop96
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop25
+; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop95
+; NO-SIMD128-FAST-NEXT: i32.const $push94=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop94
; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93
-; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92
-; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81
-; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82
-; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop93
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop29
+; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.const $push92=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop92
+; NO-SIMD128-FAST-NEXT: i32.const $push91=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop91
; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90
-; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89
-; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop90
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop33
+; NO-SIMD128-FAST-NEXT: i32.add $push34=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.const $push89=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop89
+; NO-SIMD128-FAST-NEXT: i32.const $push88=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push36=, $pop35, $pop88
+; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop87
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop37
+; NO-SIMD128-FAST-NEXT: i32.add $push38=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop86
+; NO-SIMD128-FAST-NEXT: i32.const $push85=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop85
+; NO-SIMD128-FAST-NEXT: i32.const $push84=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop84
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop41
+; NO-SIMD128-FAST-NEXT: i32.add $push42=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop83
+; NO-SIMD128-FAST-NEXT: i32.const $push82=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop82
+; NO-SIMD128-FAST-NEXT: i32.const $push81=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop81
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop45
+; NO-SIMD128-FAST-NEXT: i32.add $push46=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.const $push80=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push47=, $pop46, $pop80
+; NO-SIMD128-FAST-NEXT: i32.const $push79=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop79
+; NO-SIMD128-FAST-NEXT: i32.const $push78=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop78
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop49
+; NO-SIMD128-FAST-NEXT: i32.add $push50=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.const $push77=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push51=, $pop50, $pop77
+; NO-SIMD128-FAST-NEXT: i32.const $push76=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push52=, $pop51, $pop76
+; NO-SIMD128-FAST-NEXT: i32.const $push75=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop75
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop53
+; NO-SIMD128-FAST-NEXT: i32.add $push54=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.const $push74=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop74
+; NO-SIMD128-FAST-NEXT: i32.const $push73=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop73
+; NO-SIMD128-FAST-NEXT: i32.const $push72=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop72
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop57
+; NO-SIMD128-FAST-NEXT: i32.add $push58=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.const $push71=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop71
+; NO-SIMD128-FAST-NEXT: i32.const $push70=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push60=, $pop59, $pop70
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop69
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop61
+; NO-SIMD128-FAST-NEXT: i32.add $push62=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.const $push68=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push63=, $pop62, $pop68
+; NO-SIMD128-FAST-NEXT: i32.const $push67=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push64=, $pop63, $pop67
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop66
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop65
; NO-SIMD128-FAST-NEXT: return
%a = add nuw <16 x i8> %x, %y
%b = add nuw <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
@@ -1949,156 +1597,134 @@ define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: avgr_u_v16i8_wrap:
; NO-SIMD128: .functype avgr_u_v16i8_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push0=, 15
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.add $push2=, $16, $32
-; NO-SIMD128-NEXT: i32.const $push3=, 1
-; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 254
-; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5
-; NO-SIMD128-NEXT: i32.const $push133=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133
-; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7
-; NO-SIMD128-NEXT: i32.const $push8=, 14
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.add $push10=, $15, $31
-; NO-SIMD128-NEXT: i32.const $push132=, 1
-; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132
-; NO-SIMD128-NEXT: i32.const $push131=, 254
-; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131
-; NO-SIMD128-NEXT: i32.const $push130=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13
-; NO-SIMD128-NEXT: i32.const $push14=, 13
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.add $push16=, $14, $30
-; NO-SIMD128-NEXT: i32.const $push129=, 1
-; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129
-; NO-SIMD128-NEXT: i32.const $push128=, 254
-; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128
-; NO-SIMD128-NEXT: i32.const $push127=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19
-; NO-SIMD128-NEXT: i32.const $push20=, 12
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.add $push22=, $13, $29
-; NO-SIMD128-NEXT: i32.const $push126=, 1
-; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126
-; NO-SIMD128-NEXT: i32.const $push125=, 254
-; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125
-; NO-SIMD128-NEXT: i32.const $push124=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124
-; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25
-; NO-SIMD128-NEXT: i32.const $push26=, 11
-; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-NEXT: i32.add $push28=, $12, $28
-; NO-SIMD128-NEXT: i32.const $push123=, 1
-; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123
-; NO-SIMD128-NEXT: i32.const $push122=, 254
-; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122
-; NO-SIMD128-NEXT: i32.const $push121=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121
-; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31
-; NO-SIMD128-NEXT: i32.const $push32=, 10
-; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-NEXT: i32.add $push34=, $11, $27
-; NO-SIMD128-NEXT: i32.const $push120=, 1
-; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120
-; NO-SIMD128-NEXT: i32.const $push119=, 254
-; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119
-; NO-SIMD128-NEXT: i32.const $push118=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118
-; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37
-; NO-SIMD128-NEXT: i32.const $push38=, 9
-; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38
-; NO-SIMD128-NEXT: i32.add $push40=, $10, $26
-; NO-SIMD128-NEXT: i32.const $push117=, 1
-; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117
-; NO-SIMD128-NEXT: i32.const $push116=, 254
-; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116
-; NO-SIMD128-NEXT: i32.const $push115=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115
-; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43
-; NO-SIMD128-NEXT: i32.add $push44=, $9, $25
-; NO-SIMD128-NEXT: i32.const $push114=, 1
-; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114
-; NO-SIMD128-NEXT: i32.const $push113=, 254
-; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113
-; NO-SIMD128-NEXT: i32.const $push112=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop47
-; NO-SIMD128-NEXT: i32.const $push48=, 7
-; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48
-; NO-SIMD128-NEXT: i32.add $push50=, $8, $24
+; NO-SIMD128-NEXT: i32.add $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.const $push1=, 1
+; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1
+; NO-SIMD128-NEXT: i32.const $push3=, 254
+; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3
; NO-SIMD128-NEXT: i32.const $push111=, 1
-; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111
-; NO-SIMD128-NEXT: i32.const $push110=, 254
-; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110
-; NO-SIMD128-NEXT: i32.const $push109=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109
-; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53
-; NO-SIMD128-NEXT: i32.const $push54=, 6
-; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54
-; NO-SIMD128-NEXT: i32.add $push56=, $7, $23
+; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop111
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop5
+; NO-SIMD128-NEXT: i32.add $push6=, $15, $31
+; NO-SIMD128-NEXT: i32.const $push110=, 1
+; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop110
+; NO-SIMD128-NEXT: i32.const $push109=, 254
+; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop109
; NO-SIMD128-NEXT: i32.const $push108=, 1
-; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108
-; NO-SIMD128-NEXT: i32.const $push107=, 254
-; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107
-; NO-SIMD128-NEXT: i32.const $push106=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106
-; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59
-; NO-SIMD128-NEXT: i32.const $push60=, 5
-; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60
-; NO-SIMD128-NEXT: i32.add $push62=, $6, $22
+; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop108
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop9
+; NO-SIMD128-NEXT: i32.add $push10=, $14, $30
+; NO-SIMD128-NEXT: i32.const $push107=, 1
+; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop107
+; NO-SIMD128-NEXT: i32.const $push106=, 254
+; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop106
; NO-SIMD128-NEXT: i32.const $push105=, 1
-; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105
-; NO-SIMD128-NEXT: i32.const $push104=, 254
-; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104
-; NO-SIMD128-NEXT: i32.const $push103=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103
-; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65
-; NO-SIMD128-NEXT: i32.add $push66=, $5, $21
+; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop105
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-NEXT: i32.add $push14=, $13, $29
+; NO-SIMD128-NEXT: i32.const $push104=, 1
+; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop104
+; NO-SIMD128-NEXT: i32.const $push103=, 254
+; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop103
; NO-SIMD128-NEXT: i32.const $push102=, 1
-; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102
-; NO-SIMD128-NEXT: i32.const $push101=, 254
-; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101
-; NO-SIMD128-NEXT: i32.const $push100=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop69
-; NO-SIMD128-NEXT: i32.const $push70=, 3
-; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70
-; NO-SIMD128-NEXT: i32.add $push72=, $4, $20
+; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop102
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop17
+; NO-SIMD128-NEXT: i32.add $push18=, $12, $28
+; NO-SIMD128-NEXT: i32.const $push101=, 1
+; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop101
+; NO-SIMD128-NEXT: i32.const $push100=, 254
+; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop100
; NO-SIMD128-NEXT: i32.const $push99=, 1
-; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99
-; NO-SIMD128-NEXT: i32.const $push98=, 254
-; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98
-; NO-SIMD128-NEXT: i32.const $push97=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97
-; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75
-; NO-SIMD128-NEXT: i32.add $push76=, $3, $19
+; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop99
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop21
+; NO-SIMD128-NEXT: i32.add $push22=, $11, $27
+; NO-SIMD128-NEXT: i32.const $push98=, 1
+; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop98
+; NO-SIMD128-NEXT: i32.const $push97=, 254
+; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop97
; NO-SIMD128-NEXT: i32.const $push96=, 1
-; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96
-; NO-SIMD128-NEXT: i32.const $push95=, 254
-; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95
-; NO-SIMD128-NEXT: i32.const $push94=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop79
-; NO-SIMD128-NEXT: i32.add $push80=, $2, $18
+; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop96
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop25
+; NO-SIMD128-NEXT: i32.add $push26=, $10, $26
+; NO-SIMD128-NEXT: i32.const $push95=, 1
+; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop95
+; NO-SIMD128-NEXT: i32.const $push94=, 254
+; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop94
; NO-SIMD128-NEXT: i32.const $push93=, 1
-; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93
-; NO-SIMD128-NEXT: i32.const $push92=, 254
-; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92
-; NO-SIMD128-NEXT: i32.const $push91=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop83
-; NO-SIMD128-NEXT: i32.add $push84=, $1, $17
+; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop93
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop29
+; NO-SIMD128-NEXT: i32.add $push30=, $9, $25
+; NO-SIMD128-NEXT: i32.const $push92=, 1
+; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop92
+; NO-SIMD128-NEXT: i32.const $push91=, 254
+; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop91
; NO-SIMD128-NEXT: i32.const $push90=, 1
-; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90
-; NO-SIMD128-NEXT: i32.const $push89=, 254
-; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89
-; NO-SIMD128-NEXT: i32.const $push88=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop87
+; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop90
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop33
+; NO-SIMD128-NEXT: i32.add $push34=, $8, $24
+; NO-SIMD128-NEXT: i32.const $push89=, 1
+; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop89
+; NO-SIMD128-NEXT: i32.const $push88=, 254
+; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop88
+; NO-SIMD128-NEXT: i32.const $push87=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop87
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop37
+; NO-SIMD128-NEXT: i32.add $push38=, $7, $23
+; NO-SIMD128-NEXT: i32.const $push86=, 1
+; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop86
+; NO-SIMD128-NEXT: i32.const $push85=, 254
+; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop85
+; NO-SIMD128-NEXT: i32.const $push84=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop84
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop41
+; NO-SIMD128-NEXT: i32.add $push42=, $6, $22
+; NO-SIMD128-NEXT: i32.const $push83=, 1
+; NO-SIMD128-NEXT: i32.add $push43=, $pop42, $pop83
+; NO-SIMD128-NEXT: i32.const $push82=, 254
+; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $pop82
+; NO-SIMD128-NEXT: i32.const $push81=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop81
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop45
+; NO-SIMD128-NEXT: i32.add $push46=, $5, $21
+; NO-SIMD128-NEXT: i32.const $push80=, 1
+; NO-SIMD128-NEXT: i32.add $push47=, $pop46, $pop80
+; NO-SIMD128-NEXT: i32.const $push79=, 254
+; NO-SIMD128-NEXT: i32.and $push48=, $pop47, $pop79
+; NO-SIMD128-NEXT: i32.const $push78=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop78
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop49
+; NO-SIMD128-NEXT: i32.add $push50=, $4, $20
+; NO-SIMD128-NEXT: i32.const $push77=, 1
+; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop77
+; NO-SIMD128-NEXT: i32.const $push76=, 254
+; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop76
+; NO-SIMD128-NEXT: i32.const $push75=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop75
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop53
+; NO-SIMD128-NEXT: i32.add $push54=, $3, $19
+; NO-SIMD128-NEXT: i32.const $push74=, 1
+; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop74
+; NO-SIMD128-NEXT: i32.const $push73=, 254
+; NO-SIMD128-NEXT: i32.and $push56=, $pop55, $pop73
+; NO-SIMD128-NEXT: i32.const $push72=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push57=, $pop56, $pop72
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop57
+; NO-SIMD128-NEXT: i32.add $push58=, $2, $18
+; NO-SIMD128-NEXT: i32.const $push71=, 1
+; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop71
+; NO-SIMD128-NEXT: i32.const $push70=, 254
+; NO-SIMD128-NEXT: i32.and $push60=, $pop59, $pop70
+; NO-SIMD128-NEXT: i32.const $push69=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop69
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop61
+; NO-SIMD128-NEXT: i32.add $push62=, $1, $17
+; NO-SIMD128-NEXT: i32.const $push68=, 1
+; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop68
+; NO-SIMD128-NEXT: i32.const $push67=, 254
+; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop67
+; NO-SIMD128-NEXT: i32.const $push66=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop66
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop65
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: avgr_u_v16i8_wrap:
@@ -2109,151 +1735,129 @@ define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1
; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133
+; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop111
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5
; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18
-; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132
-; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131
-; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130
+; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop110
+; NO-SIMD128-FAST-NEXT: i32.const $push109=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop109
+; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop108
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9
; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19
-; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129
-; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128
-; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127
-; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126
-; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125
-; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19
-; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123
-; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122
-; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120
-; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119
-; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117
-; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116
-; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114
-; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113
-; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41
-; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111
-; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110
-; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46
-; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108
-; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107
-; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52
-; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop107
+; NO-SIMD128-FAST-NEXT: i32.const $push106=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop106
; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105
-; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104
-; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57
-; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58
-; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop105
+; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $20
+; NO-SIMD128-FAST-NEXT: i32.const $push104=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop104
+; NO-SIMD128-FAST-NEXT: i32.const $push103=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop103
; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102
-; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101
-; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63
-; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64
-; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop102
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop17
+; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.const $push101=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop101
+; NO-SIMD128-FAST-NEXT: i32.const $push100=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop100
; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99
-; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98
-; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69
-; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70
-; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop99
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop98
+; NO-SIMD128-FAST-NEXT: i32.const $push97=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop97
; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96
-; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95
-; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75
-; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76
-; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop96
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop25
+; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop95
+; NO-SIMD128-FAST-NEXT: i32.const $push94=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop94
; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93
-; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92
-; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81
-; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82
-; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop93
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop29
+; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.const $push92=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop92
+; NO-SIMD128-FAST-NEXT: i32.const $push91=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop91
; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90
-; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254
-; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89
-; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop90
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop33
+; NO-SIMD128-FAST-NEXT: i32.add $push34=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.const $push89=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop89
+; NO-SIMD128-FAST-NEXT: i32.const $push88=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push36=, $pop35, $pop88
+; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop87
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop37
+; NO-SIMD128-FAST-NEXT: i32.add $push38=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop86
+; NO-SIMD128-FAST-NEXT: i32.const $push85=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop85
+; NO-SIMD128-FAST-NEXT: i32.const $push84=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop84
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop41
+; NO-SIMD128-FAST-NEXT: i32.add $push42=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop83
+; NO-SIMD128-FAST-NEXT: i32.const $push82=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop82
+; NO-SIMD128-FAST-NEXT: i32.const $push81=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop81
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop45
+; NO-SIMD128-FAST-NEXT: i32.add $push46=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.const $push80=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push47=, $pop46, $pop80
+; NO-SIMD128-FAST-NEXT: i32.const $push79=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop79
+; NO-SIMD128-FAST-NEXT: i32.const $push78=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop78
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop49
+; NO-SIMD128-FAST-NEXT: i32.add $push50=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.const $push77=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push51=, $pop50, $pop77
+; NO-SIMD128-FAST-NEXT: i32.const $push76=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push52=, $pop51, $pop76
+; NO-SIMD128-FAST-NEXT: i32.const $push75=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop75
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop53
+; NO-SIMD128-FAST-NEXT: i32.add $push54=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.const $push74=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop74
+; NO-SIMD128-FAST-NEXT: i32.const $push73=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop73
+; NO-SIMD128-FAST-NEXT: i32.const $push72=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop72
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop57
+; NO-SIMD128-FAST-NEXT: i32.add $push58=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.const $push71=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop71
+; NO-SIMD128-FAST-NEXT: i32.const $push70=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push60=, $pop59, $pop70
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop69
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop61
+; NO-SIMD128-FAST-NEXT: i32.add $push62=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.const $push68=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push63=, $pop62, $pop68
+; NO-SIMD128-FAST-NEXT: i32.const $push67=, 254
+; NO-SIMD128-FAST-NEXT: i32.and $push64=, $pop63, $pop67
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop66
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop65
; NO-SIMD128-FAST-NEXT: return
%a = add <16 x i8> %x, %y
%b = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
@@ -2279,140 +1883,118 @@ define <16 x i8> @abs_v16i8(<16 x i8> %x) {
; NO-SIMD128-LABEL: abs_v16i8:
; NO-SIMD128: .functype abs_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push4=, 15
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
; NO-SIMD128-NEXT: i32.extend8_s $push0=, $16
; NO-SIMD128-NEXT: i32.const $push1=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push117=, $pop0, $pop1
-; NO-SIMD128-NEXT: local.tee $push116=, $17=, $pop117
-; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop116
+; NO-SIMD128-NEXT: i32.shr_s $push95=, $pop0, $pop1
+; NO-SIMD128-NEXT: local.tee $push94=, $17=, $pop95
+; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop94
; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3
-; NO-SIMD128-NEXT: i32.const $push9=, 14
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.extend8_s $push6=, $15
-; NO-SIMD128-NEXT: i32.const $push115=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push114=, $pop6, $pop115
-; NO-SIMD128-NEXT: local.tee $push113=, $16=, $pop114
-; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop113
-; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push14=, 13
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.extend8_s $push11=, $14
-; NO-SIMD128-NEXT: i32.const $push112=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push111=, $pop11, $pop112
-; NO-SIMD128-NEXT: local.tee $push110=, $16=, $pop111
-; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop110
-; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop13
-; NO-SIMD128-NEXT: i32.const $push19=, 12
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.extend8_s $push16=, $13
-; NO-SIMD128-NEXT: i32.const $push109=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push108=, $pop16, $pop109
-; NO-SIMD128-NEXT: local.tee $push107=, $16=, $pop108
-; NO-SIMD128-NEXT: i32.xor $push17=, $13, $pop107
-; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push24=, 11
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.extend8_s $push21=, $12
-; NO-SIMD128-NEXT: i32.const $push106=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push105=, $pop21, $pop106
-; NO-SIMD128-NEXT: local.tee $push104=, $16=, $pop105
-; NO-SIMD128-NEXT: i32.xor $push22=, $12, $pop104
-; NO-SIMD128-NEXT: i32.sub $push23=, $pop22, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push29=, 10
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.extend8_s $push26=, $11
-; NO-SIMD128-NEXT: i32.const $push103=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push102=, $pop26, $pop103
-; NO-SIMD128-NEXT: local.tee $push101=, $16=, $pop102
-; NO-SIMD128-NEXT: i32.xor $push27=, $11, $pop101
-; NO-SIMD128-NEXT: i32.sub $push28=, $pop27, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push34=, 9
-; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34
-; NO-SIMD128-NEXT: i32.extend8_s $push31=, $10
-; NO-SIMD128-NEXT: i32.const $push100=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push99=, $pop31, $pop100
-; NO-SIMD128-NEXT: local.tee $push98=, $16=, $pop99
-; NO-SIMD128-NEXT: i32.xor $push32=, $10, $pop98
-; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33
-; NO-SIMD128-NEXT: i32.extend8_s $push36=, $9
-; NO-SIMD128-NEXT: i32.const $push97=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push96=, $pop36, $pop97
-; NO-SIMD128-NEXT: local.tee $push95=, $16=, $pop96
-; NO-SIMD128-NEXT: i32.xor $push37=, $9, $pop95
-; NO-SIMD128-NEXT: i32.sub $push38=, $pop37, $16
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop38
-; NO-SIMD128-NEXT: i32.const $push94=, 7
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop94
-; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop3
+; NO-SIMD128-NEXT: i32.extend8_s $push4=, $15
; NO-SIMD128-NEXT: i32.const $push93=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop39, $pop93
+; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop4, $pop93
; NO-SIMD128-NEXT: local.tee $push91=, $16=, $pop92
-; NO-SIMD128-NEXT: i32.xor $push40=, $8, $pop91
-; NO-SIMD128-NEXT: i32.sub $push41=, $pop40, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop41
-; NO-SIMD128-NEXT: i32.const $push46=, 6
-; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46
-; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7
+; NO-SIMD128-NEXT: i32.xor $push5=, $15, $pop91
+; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $16
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop6
+; NO-SIMD128-NEXT: i32.extend8_s $push7=, $14
; NO-SIMD128-NEXT: i32.const $push90=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop43, $pop90
+; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop7, $pop90
; NO-SIMD128-NEXT: local.tee $push88=, $16=, $pop89
-; NO-SIMD128-NEXT: i32.xor $push44=, $7, $pop88
-; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45
-; NO-SIMD128-NEXT: i32.const $push51=, 5
-; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-NEXT: i32.extend8_s $push48=, $6
+; NO-SIMD128-NEXT: i32.xor $push8=, $14, $pop88
+; NO-SIMD128-NEXT: i32.sub $push9=, $pop8, $16
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop9
+; NO-SIMD128-NEXT: i32.extend8_s $push10=, $13
; NO-SIMD128-NEXT: i32.const $push87=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop48, $pop87
+; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop10, $pop87
; NO-SIMD128-NEXT: local.tee $push85=, $16=, $pop86
-; NO-SIMD128-NEXT: i32.xor $push49=, $6, $pop85
-; NO-SIMD128-NEXT: i32.sub $push50=, $pop49, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50
-; NO-SIMD128-NEXT: i32.extend8_s $push53=, $5
+; NO-SIMD128-NEXT: i32.xor $push11=, $13, $pop85
+; NO-SIMD128-NEXT: i32.sub $push12=, $pop11, $16
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-NEXT: i32.extend8_s $push13=, $12
; NO-SIMD128-NEXT: i32.const $push84=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop53, $pop84
+; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop13, $pop84
; NO-SIMD128-NEXT: local.tee $push82=, $16=, $pop83
-; NO-SIMD128-NEXT: i32.xor $push54=, $5, $pop82
-; NO-SIMD128-NEXT: i32.sub $push55=, $pop54, $16
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop55
-; NO-SIMD128-NEXT: i32.const $push59=, 3
-; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59
-; NO-SIMD128-NEXT: i32.extend8_s $push56=, $4
+; NO-SIMD128-NEXT: i32.xor $push14=, $12, $pop82
+; NO-SIMD128-NEXT: i32.sub $push15=, $pop14, $16
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop15
+; NO-SIMD128-NEXT: i32.extend8_s $push16=, $11
; NO-SIMD128-NEXT: i32.const $push81=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop56, $pop81
+; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop16, $pop81
; NO-SIMD128-NEXT: local.tee $push79=, $16=, $pop80
-; NO-SIMD128-NEXT: i32.xor $push57=, $4, $pop79
-; NO-SIMD128-NEXT: i32.sub $push58=, $pop57, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58
-; NO-SIMD128-NEXT: i32.extend8_s $push61=, $3
+; NO-SIMD128-NEXT: i32.xor $push17=, $11, $pop79
+; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop18
+; NO-SIMD128-NEXT: i32.extend8_s $push19=, $10
; NO-SIMD128-NEXT: i32.const $push78=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop61, $pop78
+; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop19, $pop78
; NO-SIMD128-NEXT: local.tee $push76=, $16=, $pop77
-; NO-SIMD128-NEXT: i32.xor $push62=, $3, $pop76
-; NO-SIMD128-NEXT: i32.sub $push63=, $pop62, $16
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop63
-; NO-SIMD128-NEXT: i32.extend8_s $push64=, $2
+; NO-SIMD128-NEXT: i32.xor $push20=, $10, $pop76
+; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $16
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop21
+; NO-SIMD128-NEXT: i32.extend8_s $push22=, $9
; NO-SIMD128-NEXT: i32.const $push75=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop64, $pop75
+; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop22, $pop75
; NO-SIMD128-NEXT: local.tee $push73=, $16=, $pop74
-; NO-SIMD128-NEXT: i32.xor $push65=, $2, $pop73
-; NO-SIMD128-NEXT: i32.sub $push66=, $pop65, $16
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop66
-; NO-SIMD128-NEXT: i32.extend8_s $push67=, $1
+; NO-SIMD128-NEXT: i32.xor $push23=, $9, $pop73
+; NO-SIMD128-NEXT: i32.sub $push24=, $pop23, $16
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop24
+; NO-SIMD128-NEXT: i32.extend8_s $push25=, $8
; NO-SIMD128-NEXT: i32.const $push72=, 7
-; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop67, $pop72
+; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop25, $pop72
; NO-SIMD128-NEXT: local.tee $push70=, $16=, $pop71
-; NO-SIMD128-NEXT: i32.xor $push68=, $1, $pop70
-; NO-SIMD128-NEXT: i32.sub $push69=, $pop68, $16
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop69
+; NO-SIMD128-NEXT: i32.xor $push26=, $8, $pop70
+; NO-SIMD128-NEXT: i32.sub $push27=, $pop26, $16
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop27
+; NO-SIMD128-NEXT: i32.extend8_s $push28=, $7
+; NO-SIMD128-NEXT: i32.const $push69=, 7
+; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop28, $pop69
+; NO-SIMD128-NEXT: local.tee $push67=, $16=, $pop68
+; NO-SIMD128-NEXT: i32.xor $push29=, $7, $pop67
+; NO-SIMD128-NEXT: i32.sub $push30=, $pop29, $16
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop30
+; NO-SIMD128-NEXT: i32.extend8_s $push31=, $6
+; NO-SIMD128-NEXT: i32.const $push66=, 7
+; NO-SIMD128-NEXT: i32.shr_s $push65=, $pop31, $pop66
+; NO-SIMD128-NEXT: local.tee $push64=, $16=, $pop65
+; NO-SIMD128-NEXT: i32.xor $push32=, $6, $pop64
+; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop33
+; NO-SIMD128-NEXT: i32.extend8_s $push34=, $5
+; NO-SIMD128-NEXT: i32.const $push63=, 7
+; NO-SIMD128-NEXT: i32.shr_s $push62=, $pop34, $pop63
+; NO-SIMD128-NEXT: local.tee $push61=, $16=, $pop62
+; NO-SIMD128-NEXT: i32.xor $push35=, $5, $pop61
+; NO-SIMD128-NEXT: i32.sub $push36=, $pop35, $16
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop36
+; NO-SIMD128-NEXT: i32.extend8_s $push37=, $4
+; NO-SIMD128-NEXT: i32.const $push60=, 7
+; NO-SIMD128-NEXT: i32.shr_s $push59=, $pop37, $pop60
+; NO-SIMD128-NEXT: local.tee $push58=, $16=, $pop59
+; NO-SIMD128-NEXT: i32.xor $push38=, $4, $pop58
+; NO-SIMD128-NEXT: i32.sub $push39=, $pop38, $16
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop39
+; NO-SIMD128-NEXT: i32.extend8_s $push40=, $3
+; NO-SIMD128-NEXT: i32.const $push57=, 7
+; NO-SIMD128-NEXT: i32.shr_s $push56=, $pop40, $pop57
+; NO-SIMD128-NEXT: local.tee $push55=, $16=, $pop56
+; NO-SIMD128-NEXT: i32.xor $push41=, $3, $pop55
+; NO-SIMD128-NEXT: i32.sub $push42=, $pop41, $16
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop42
+; NO-SIMD128-NEXT: i32.extend8_s $push43=, $2
+; NO-SIMD128-NEXT: i32.const $push54=, 7
+; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop43, $pop54
+; NO-SIMD128-NEXT: local.tee $push52=, $16=, $pop53
+; NO-SIMD128-NEXT: i32.xor $push44=, $2, $pop52
+; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop45
+; NO-SIMD128-NEXT: i32.extend8_s $push46=, $1
+; NO-SIMD128-NEXT: i32.const $push51=, 7
+; NO-SIMD128-NEXT: i32.shr_s $push50=, $pop46, $pop51
+; NO-SIMD128-NEXT: local.tee $push49=, $16=, $pop50
+; NO-SIMD128-NEXT: i32.xor $push47=, $1, $pop49
+; NO-SIMD128-NEXT: i32.sub $push48=, $pop47, $16
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop48
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: abs_v16i8:
@@ -2420,138 +2002,116 @@ define <16 x i8> @abs_v16i8(<16 x i8> %x) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $1
; NO-SIMD128-FAST-NEXT: i32.const $push1=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push117=, $pop0, $pop1
-; NO-SIMD128-FAST-NEXT: local.tee $push116=, $17=, $pop117
-; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop116
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop0, $pop1
+; NO-SIMD128-FAST-NEXT: local.tee $push94=, $17=, $pop95
+; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop94
; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $17
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $2
-; NO-SIMD128-FAST-NEXT: i32.const $push115=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push114=, $pop4, $pop115
-; NO-SIMD128-FAST-NEXT: local.tee $push113=, $1=, $pop114
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop113
+; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop4, $pop93
+; NO-SIMD128-FAST-NEXT: local.tee $push91=, $1=, $pop92
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop91
; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $3
-; NO-SIMD128-FAST-NEXT: i32.const $push112=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push111=, $pop7, $pop112
-; NO-SIMD128-FAST-NEXT: local.tee $push110=, $2=, $pop111
-; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop110
+; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop7, $pop90
+; NO-SIMD128-FAST-NEXT: local.tee $push88=, $2=, $pop89
+; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop88
; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4
-; NO-SIMD128-FAST-NEXT: i32.const $push109=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push108=, $pop10, $pop109
-; NO-SIMD128-FAST-NEXT: local.tee $push107=, $3=, $pop108
-; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop107
-; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5
-; NO-SIMD128-FAST-NEXT: i32.const $push106=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push105=, $pop15, $pop106
-; NO-SIMD128-FAST-NEXT: local.tee $push104=, $4=, $pop105
-; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop104
-; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $6
-; NO-SIMD128-FAST-NEXT: i32.const $push103=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push102=, $pop18, $pop103
-; NO-SIMD128-FAST-NEXT: local.tee $push101=, $5=, $pop102
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop101
-; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $7
-; NO-SIMD128-FAST-NEXT: i32.const $push100=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push99=, $pop23, $pop100
-; NO-SIMD128-FAST-NEXT: local.tee $push98=, $6=, $pop99
-; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop98
-; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push97=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop97
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $8
-; NO-SIMD128-FAST-NEXT: i32.const $push96=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop28, $pop96
-; NO-SIMD128-FAST-NEXT: local.tee $push94=, $7=, $pop95
-; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop94
-; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $7
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop30
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $9
-; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop32, $pop93
-; NO-SIMD128-FAST-NEXT: local.tee $push91=, $8=, $pop92
-; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop91
-; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $pop33, $8
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push38=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $10
-; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop35, $pop90
-; NO-SIMD128-FAST-NEXT: local.tee $push88=, $9=, $pop89
-; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $10, $pop88
-; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $pop36, $9
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $11
; NO-SIMD128-FAST-NEXT: i32.const $push87=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop40, $pop87
-; NO-SIMD128-FAST-NEXT: local.tee $push85=, $10=, $pop86
-; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop85
-; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $10
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push48=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop10, $pop87
+; NO-SIMD128-FAST-NEXT: local.tee $push85=, $3=, $pop86
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop85
+; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $5
; NO-SIMD128-FAST-NEXT: i32.const $push84=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop45, $pop84
-; NO-SIMD128-FAST-NEXT: local.tee $push82=, $11=, $pop83
-; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop82
-; NO-SIMD128-FAST-NEXT: i32.sub $push47=, $pop46, $11
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $13
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop13, $pop84
+; NO-SIMD128-FAST-NEXT: local.tee $push82=, $4=, $pop83
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $5, $pop82
+; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop14, $4
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $6
; NO-SIMD128-FAST-NEXT: i32.const $push81=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop50, $pop81
-; NO-SIMD128-FAST-NEXT: local.tee $push79=, $12=, $pop80
-; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $13, $pop79
-; NO-SIMD128-FAST-NEXT: i32.sub $push52=, $pop51, $12
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52
-; NO-SIMD128-FAST-NEXT: i32.const $push58=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push55=, $14
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop16, $pop81
+; NO-SIMD128-FAST-NEXT: local.tee $push79=, $5=, $pop80
+; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $6, $pop79
+; NO-SIMD128-FAST-NEXT: i32.sub $push18=, $pop17, $5
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7
; NO-SIMD128-FAST-NEXT: i32.const $push78=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop55, $pop78
-; NO-SIMD128-FAST-NEXT: local.tee $push76=, $13=, $pop77
-; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $14, $pop76
-; NO-SIMD128-FAST-NEXT: i32.sub $push57=, $pop56, $13
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop57
-; NO-SIMD128-FAST-NEXT: i32.const $push63=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push64=, $0, $pop63
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $15
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop19, $pop78
+; NO-SIMD128-FAST-NEXT: local.tee $push76=, $6=, $pop77
+; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $7, $pop76
+; NO-SIMD128-FAST-NEXT: i32.sub $push21=, $pop20, $6
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $8
; NO-SIMD128-FAST-NEXT: i32.const $push75=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop60, $pop75
-; NO-SIMD128-FAST-NEXT: local.tee $push73=, $14=, $pop74
-; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $15, $pop73
-; NO-SIMD128-FAST-NEXT: i32.sub $push62=, $pop61, $14
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop64), $pop62
-; NO-SIMD128-FAST-NEXT: i32.const $push68=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push69=, $0, $pop68
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push65=, $16
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop22, $pop75
+; NO-SIMD128-FAST-NEXT: local.tee $push73=, $7=, $pop74
+; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $8, $pop73
+; NO-SIMD128-FAST-NEXT: i32.sub $push24=, $pop23, $7
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9
; NO-SIMD128-FAST-NEXT: i32.const $push72=, 7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop65, $pop72
-; NO-SIMD128-FAST-NEXT: local.tee $push70=, $0=, $pop71
-; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $16, $pop70
-; NO-SIMD128-FAST-NEXT: i32.sub $push67=, $pop66, $0
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop69), $pop67
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop25, $pop72
+; NO-SIMD128-FAST-NEXT: local.tee $push70=, $8=, $pop71
+; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $9, $pop70
+; NO-SIMD128-FAST-NEXT: i32.sub $push27=, $pop26, $8
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $10
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push68=, $pop28, $pop69
+; NO-SIMD128-FAST-NEXT: local.tee $push67=, $9=, $pop68
+; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $10, $pop67
+; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $9
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $11
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop31, $pop66
+; NO-SIMD128-FAST-NEXT: local.tee $push64=, $10=, $pop65
+; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $11, $pop64
+; NO-SIMD128-FAST-NEXT: i32.sub $push33=, $pop32, $10
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $12
+; NO-SIMD128-FAST-NEXT: i32.const $push63=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push62=, $pop34, $pop63
+; NO-SIMD128-FAST-NEXT: local.tee $push61=, $11=, $pop62
+; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $12, $pop61
+; NO-SIMD128-FAST-NEXT: i32.sub $push36=, $pop35, $11
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $13
+; NO-SIMD128-FAST-NEXT: i32.const $push60=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push59=, $pop37, $pop60
+; NO-SIMD128-FAST-NEXT: local.tee $push58=, $12=, $pop59
+; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $13, $pop58
+; NO-SIMD128-FAST-NEXT: i32.sub $push39=, $pop38, $12
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $14
+; NO-SIMD128-FAST-NEXT: i32.const $push57=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push56=, $pop40, $pop57
+; NO-SIMD128-FAST-NEXT: local.tee $push55=, $13=, $pop56
+; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $14, $pop55
+; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $13
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push43=, $15
+; NO-SIMD128-FAST-NEXT: i32.const $push54=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push53=, $pop43, $pop54
+; NO-SIMD128-FAST-NEXT: local.tee $push52=, $14=, $pop53
+; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $15, $pop52
+; NO-SIMD128-FAST-NEXT: i32.sub $push45=, $pop44, $14
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push46=, $16
+; NO-SIMD128-FAST-NEXT: i32.const $push51=, 7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop46, $pop51
+; NO-SIMD128-FAST-NEXT: local.tee $push49=, $15=, $pop50
+; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $16, $pop49
+; NO-SIMD128-FAST-NEXT: i32.sub $push48=, $pop47, $15
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48
; NO-SIMD128-FAST-NEXT: return
%a = sub <16 x i8> zeroinitializer, %x
%b = icmp slt <16 x i8> %x, zeroinitializer
@@ -2576,75 +2136,53 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) {
; NO-SIMD128: .functype neg_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 0
-; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $9
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push53=, 0
-; NO-SIMD128-NEXT: i32.sub $push2=, $pop53, $5
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push52=, 0
-; NO-SIMD128-NEXT: i32.sub $push3=, $pop52, $3
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push51=, 0
-; NO-SIMD128-NEXT: i32.sub $push4=, $pop51, $2
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push50=, 0
-; NO-SIMD128-NEXT: i32.sub $push5=, $pop50, $1
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push7=, 15
-; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-NEXT: i32.const $push49=, 0
-; NO-SIMD128-NEXT: i32.sub $push6=, $pop49, $16
-; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6
-; NO-SIMD128-NEXT: i32.const $push10=, 14
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.const $push48=, 0
-; NO-SIMD128-NEXT: i32.sub $push9=, $pop48, $15
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9
-; NO-SIMD128-NEXT: i32.const $push13=, 13
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.const $push47=, 0
-; NO-SIMD128-NEXT: i32.sub $push12=, $pop47, $14
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push16=, 12
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.const $push46=, 0
-; NO-SIMD128-NEXT: i32.sub $push15=, $pop46, $13
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push19=, 11
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.const $push45=, 0
-; NO-SIMD128-NEXT: i32.sub $push18=, $pop45, $12
-; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push22=, 10
-; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-NEXT: i32.const $push44=, 0
-; NO-SIMD128-NEXT: i32.sub $push21=, $pop44, $11
-; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21
-; NO-SIMD128-NEXT: i32.const $push25=, 9
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.const $push43=, 0
-; NO-SIMD128-NEXT: i32.sub $push24=, $pop43, $10
-; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push28=, 7
-; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-NEXT: i32.const $push42=, 0
-; NO-SIMD128-NEXT: i32.sub $push27=, $pop42, $8
-; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27
-; NO-SIMD128-NEXT: i32.const $push31=, 6
-; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-NEXT: i32.const $push41=, 0
-; NO-SIMD128-NEXT: i32.sub $push30=, $pop41, $7
-; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30
-; NO-SIMD128-NEXT: i32.const $push34=, 5
-; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34
-; NO-SIMD128-NEXT: i32.const $push40=, 0
-; NO-SIMD128-NEXT: i32.sub $push33=, $pop40, $6
-; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33
-; NO-SIMD128-NEXT: i32.const $push37=, 3
-; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37
-; NO-SIMD128-NEXT: i32.const $push39=, 0
-; NO-SIMD128-NEXT: i32.sub $push36=, $pop39, $4
-; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36
+; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $16
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop1
+; NO-SIMD128-NEXT: i32.const $push31=, 0
+; NO-SIMD128-NEXT: i32.sub $push2=, $pop31, $15
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push30=, 0
+; NO-SIMD128-NEXT: i32.sub $push3=, $pop30, $14
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push29=, 0
+; NO-SIMD128-NEXT: i32.sub $push4=, $pop29, $13
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push28=, 0
+; NO-SIMD128-NEXT: i32.sub $push5=, $pop28, $12
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop5
+; NO-SIMD128-NEXT: i32.const $push27=, 0
+; NO-SIMD128-NEXT: i32.sub $push6=, $pop27, $11
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push26=, 0
+; NO-SIMD128-NEXT: i32.sub $push7=, $pop26, $10
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop7
+; NO-SIMD128-NEXT: i32.const $push25=, 0
+; NO-SIMD128-NEXT: i32.sub $push8=, $pop25, $9
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push24=, 0
+; NO-SIMD128-NEXT: i32.sub $push9=, $pop24, $8
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop9
+; NO-SIMD128-NEXT: i32.const $push23=, 0
+; NO-SIMD128-NEXT: i32.sub $push10=, $pop23, $7
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push22=, 0
+; NO-SIMD128-NEXT: i32.sub $push11=, $pop22, $6
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop11
+; NO-SIMD128-NEXT: i32.const $push21=, 0
+; NO-SIMD128-NEXT: i32.sub $push12=, $pop21, $5
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push20=, 0
+; NO-SIMD128-NEXT: i32.sub $push13=, $pop20, $4
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop13
+; NO-SIMD128-NEXT: i32.const $push19=, 0
+; NO-SIMD128-NEXT: i32.sub $push14=, $pop19, $3
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push18=, 0
+; NO-SIMD128-NEXT: i32.sub $push15=, $pop18, $2
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop15
+; NO-SIMD128-NEXT: i32.const $push17=, 0
+; NO-SIMD128-NEXT: i32.sub $push16=, $pop17, $1
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: neg_v16i8:
@@ -2653,73 +2191,51 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0
; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop53, $2
+; NO-SIMD128-FAST-NEXT: i32.const $push31=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop31, $2
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop52, $3
+; NO-SIMD128-FAST-NEXT: i32.const $push30=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop30, $3
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop51, $4
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop50, $5
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop49, $6
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push48=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop48, $7
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop47, $8
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop46, $9
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop45, $10
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push23=, $pop44, $11
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push26=, $pop43, $12
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push29=, $pop42, $13
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push32=, $pop41, $14
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.const $push40=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push35=, $pop40, $15
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push38=, $pop39, $16
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38
+; NO-SIMD128-FAST-NEXT: i32.const $push29=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop29, $4
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.const $push28=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $pop28, $5
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.const $push27=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop27, $6
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push26=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop26, $7
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.const $push25=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop25, $8
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push24=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop24, $9
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop23, $10
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push11=, $pop22, $11
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop21, $12
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop20, $13
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push14=, $pop19, $14
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop18, $15
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $16
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%a = sub <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
@@ -2744,124 +2260,80 @@ define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push40=, $17, $pop0
-; NO-SIMD128-NEXT: local.tee $push39=, $17=, $pop40
-; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop39
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop1
-; NO-SIMD128-NEXT: i32.shl $push2=, $5, $17
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop2
-; NO-SIMD128-NEXT: i32.shl $push3=, $3, $17
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-NEXT: i32.shl $push4=, $2, $17
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop4
-; NO-SIMD128-NEXT: i32.shl $push5=, $1, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push7=, 15
-; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-NEXT: i32.shl $push6=, $16, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6
-; NO-SIMD128-NEXT: i32.const $push10=, 14
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.shl $push9=, $15, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9
-; NO-SIMD128-NEXT: i32.const $push13=, 13
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.shl $push12=, $14, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push16=, 12
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.shl $push15=, $13, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push19=, 11
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.shl $push18=, $12, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push22=, 10
-; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-NEXT: i32.shl $push21=, $11, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21
-; NO-SIMD128-NEXT: i32.const $push25=, 9
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.shl $push24=, $10, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push28=, 7
-; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-NEXT: i32.shl $push27=, $8, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27
-; NO-SIMD128-NEXT: i32.const $push31=, 6
-; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-NEXT: i32.shl $push30=, $7, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30
-; NO-SIMD128-NEXT: i32.const $push34=, 5
-; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34
-; NO-SIMD128-NEXT: i32.shl $push33=, $6, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33
-; NO-SIMD128-NEXT: i32.const $push37=, 3
-; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37
-; NO-SIMD128-NEXT: i32.shl $push36=, $4, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36
+; NO-SIMD128-NEXT: i32.and $push18=, $17, $pop0
+; NO-SIMD128-NEXT: local.tee $push17=, $17=, $pop18
+; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop17
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop1
+; NO-SIMD128-NEXT: i32.shl $push2=, $15, $17
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop2
+; NO-SIMD128-NEXT: i32.shl $push3=, $14, $17
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop3
+; NO-SIMD128-NEXT: i32.shl $push4=, $13, $17
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop4
+; NO-SIMD128-NEXT: i32.shl $push5=, $12, $17
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop5
+; NO-SIMD128-NEXT: i32.shl $push6=, $11, $17
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop6
+; NO-SIMD128-NEXT: i32.shl $push7=, $10, $17
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop7
+; NO-SIMD128-NEXT: i32.shl $push8=, $9, $17
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-NEXT: i32.shl $push9=, $8, $17
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop9
+; NO-SIMD128-NEXT: i32.shl $push10=, $7, $17
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop10
+; NO-SIMD128-NEXT: i32.shl $push11=, $6, $17
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop11
+; NO-SIMD128-NEXT: i32.shl $push12=, $5, $17
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop12
+; NO-SIMD128-NEXT: i32.shl $push13=, $4, $17
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop13
+; NO-SIMD128-NEXT: i32.shl $push14=, $3, $17
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop14
+; NO-SIMD128-NEXT: i32.shl $push15=, $2, $17
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop15
+; NO-SIMD128-NEXT: i32.shl $push16=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_v16i8:
; NO-SIMD128-FAST: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push40=, $17, $pop0
-; NO-SIMD128-FAST-NEXT: local.tee $push39=, $17=, $pop40
-; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop39
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $17, $pop0
+; NO-SIMD128-FAST-NEXT: local.tee $push17=, $17=, $pop18
+; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $17
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2
; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $17
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6
-; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16
-; NO-SIMD128-FAST-NEXT: i32.shl $push17=, $9, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.shl $push23=, $11, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $12, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.shl $push29=, $13, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $14, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.shl $push35=, $15, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $16, $17
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38
+; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $17
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <16 x i8> undef, i8 %x, i32 0
%s = shufflevector <16 x i8> %t, <16 x i8> undef,
@@ -2890,75 +2362,53 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
; NO-SIMD128: .functype shl_const_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 5
-; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop0
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push53=, 5
-; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop53
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push52=, 5
-; NO-SIMD128-NEXT: i32.shl $push3=, $3, $pop52
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push51=, 5
-; NO-SIMD128-NEXT: i32.shl $push4=, $2, $pop51
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push50=, 5
-; NO-SIMD128-NEXT: i32.shl $push5=, $1, $pop50
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push7=, 15
-; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-NEXT: i32.const $push49=, 5
-; NO-SIMD128-NEXT: i32.shl $push6=, $16, $pop49
-; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6
-; NO-SIMD128-NEXT: i32.const $push10=, 14
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.const $push48=, 5
-; NO-SIMD128-NEXT: i32.shl $push9=, $15, $pop48
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9
-; NO-SIMD128-NEXT: i32.const $push13=, 13
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.const $push47=, 5
-; NO-SIMD128-NEXT: i32.shl $push12=, $14, $pop47
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push16=, 12
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.const $push46=, 5
-; NO-SIMD128-NEXT: i32.shl $push15=, $13, $pop46
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push19=, 11
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.const $push45=, 5
-; NO-SIMD128-NEXT: i32.shl $push18=, $12, $pop45
-; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push22=, 10
-; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-NEXT: i32.const $push44=, 5
-; NO-SIMD128-NEXT: i32.shl $push21=, $11, $pop44
-; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21
-; NO-SIMD128-NEXT: i32.const $push25=, 9
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.const $push43=, 5
-; NO-SIMD128-NEXT: i32.shl $push24=, $10, $pop43
-; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push28=, 7
-; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-NEXT: i32.const $push42=, 5
-; NO-SIMD128-NEXT: i32.shl $push27=, $8, $pop42
-; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27
-; NO-SIMD128-NEXT: i32.const $push31=, 6
-; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-NEXT: i32.const $push41=, 5
-; NO-SIMD128-NEXT: i32.shl $push30=, $7, $pop41
-; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30
-; NO-SIMD128-NEXT: i32.const $push40=, 5
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop40
-; NO-SIMD128-NEXT: i32.const $push39=, 5
-; NO-SIMD128-NEXT: i32.shl $push33=, $6, $pop39
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop33
-; NO-SIMD128-NEXT: i32.const $push36=, 3
-; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-NEXT: i32.const $push38=, 5
-; NO-SIMD128-NEXT: i32.shl $push35=, $4, $pop38
-; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35
+; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop0
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop1
+; NO-SIMD128-NEXT: i32.const $push31=, 5
+; NO-SIMD128-NEXT: i32.shl $push2=, $15, $pop31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push30=, 5
+; NO-SIMD128-NEXT: i32.shl $push3=, $14, $pop30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push29=, 5
+; NO-SIMD128-NEXT: i32.shl $push4=, $13, $pop29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push28=, 5
+; NO-SIMD128-NEXT: i32.shl $push5=, $12, $pop28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop5
+; NO-SIMD128-NEXT: i32.const $push27=, 5
+; NO-SIMD128-NEXT: i32.shl $push6=, $11, $pop27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push26=, 5
+; NO-SIMD128-NEXT: i32.shl $push7=, $10, $pop26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop7
+; NO-SIMD128-NEXT: i32.const $push25=, 5
+; NO-SIMD128-NEXT: i32.shl $push8=, $9, $pop25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push24=, 5
+; NO-SIMD128-NEXT: i32.shl $push9=, $8, $pop24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop9
+; NO-SIMD128-NEXT: i32.const $push23=, 5
+; NO-SIMD128-NEXT: i32.shl $push10=, $7, $pop23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push22=, 5
+; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop11
+; NO-SIMD128-NEXT: i32.const $push21=, 5
+; NO-SIMD128-NEXT: i32.shl $push12=, $5, $pop21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push20=, 5
+; NO-SIMD128-NEXT: i32.shl $push13=, $4, $pop20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop13
+; NO-SIMD128-NEXT: i32.const $push19=, 5
+; NO-SIMD128-NEXT: i32.shl $push14=, $3, $pop19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push18=, 5
+; NO-SIMD128-NEXT: i32.shl $push15=, $2, $pop18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop15
+; NO-SIMD128-NEXT: i32.const $push17=, 5
+; NO-SIMD128-NEXT: i32.shl $push16=, $1, $pop17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_const_v16i8:
@@ -2967,73 +2417,51 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5
; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop53
+; NO-SIMD128-FAST-NEXT: i32.const $push31=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop31
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop52
+; NO-SIMD128-FAST-NEXT: i32.const $push30=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop30
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop51
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop50
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop49
-; NO-SIMD128-FAST-NEXT: i32.const $push48=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $6, $pop48
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $7, $pop47
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $8, $pop46
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $9, $pop45
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push19=, $10, $pop44
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop43
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push25=, $12, $pop42
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $13, $pop41
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push40=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push31=, $14, $pop40
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $15, $pop39
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push38=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $16, $pop38
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37
+; NO-SIMD128-FAST-NEXT: i32.const $push29=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop29
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.const $push28=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $pop28
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $pop26
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.const $push25=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $pop25
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $pop24
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $pop21
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $pop20
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $pop18
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $pop17
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%a = shl <16 x i8> %v,
<i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5,
@@ -3248,91 +2676,69 @@ define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
; NO-SIMD128: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0
-; NO-SIMD128-NEXT: i32.shl $push2=, $9, $pop1
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push69=, 255
-; NO-SIMD128-NEXT: i32.and $push3=, $21, $pop69
-; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop3
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push68=, 255
-; NO-SIMD128-NEXT: i32.and $push5=, $19, $pop68
-; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop5
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push67=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop67
-; NO-SIMD128-NEXT: i32.shl $push8=, $2, $pop7
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push66=, 255
-; NO-SIMD128-NEXT: i32.and $push9=, $17, $pop66
-; NO-SIMD128-NEXT: i32.shl $push10=, $1, $pop9
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop10
-; NO-SIMD128-NEXT: i32.const $push13=, 15
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.const $push65=, 255
-; NO-SIMD128-NEXT: i32.and $push11=, $32, $pop65
-; NO-SIMD128-NEXT: i32.shl $push12=, $16, $pop11
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push17=, 14
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.const $push64=, 255
-; NO-SIMD128-NEXT: i32.and $push15=, $31, $pop64
-; NO-SIMD128-NEXT: i32.shl $push16=, $15, $pop15
-; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push21=, 13
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.const $push63=, 255
-; NO-SIMD128-NEXT: i32.and $push19=, $30, $pop63
-; NO-SIMD128-NEXT: i32.shl $push20=, $14, $pop19
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push25=, 12
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.const $push62=, 255
-; NO-SIMD128-NEXT: i32.and $push23=, $29, $pop62
-; NO-SIMD128-NEXT: i32.shl $push24=, $13, $pop23
-; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push29=, 11
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.const $push61=, 255
-; NO-SIMD128-NEXT: i32.and $push27=, $28, $pop61
-; NO-SIMD128-NEXT: i32.shl $push28=, $12, $pop27
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push33=, 10
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.const $push60=, 255
-; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop60
-; NO-SIMD128-NEXT: i32.shl $push32=, $11, $pop31
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push37=, 9
-; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37
-; NO-SIMD128-NEXT: i32.const $push59=, 255
-; NO-SIMD128-NEXT: i32.and $push35=, $26, $pop59
-; NO-SIMD128-NEXT: i32.shl $push36=, $10, $pop35
-; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36
-; NO-SIMD128-NEXT: i32.const $push41=, 7
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.const $push58=, 255
-; NO-SIMD128-NEXT: i32.and $push39=, $24, $pop58
-; NO-SIMD128-NEXT: i32.shl $push40=, $8, $pop39
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.const $push45=, 6
-; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45
-; NO-SIMD128-NEXT: i32.const $push57=, 255
-; NO-SIMD128-NEXT: i32.and $push43=, $23, $pop57
-; NO-SIMD128-NEXT: i32.shl $push44=, $7, $pop43
-; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44
-; NO-SIMD128-NEXT: i32.const $push49=, 5
-; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-NEXT: i32.const $push56=, 255
-; NO-SIMD128-NEXT: i32.and $push47=, $22, $pop56
-; NO-SIMD128-NEXT: i32.shl $push48=, $6, $pop47
-; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-NEXT: i32.const $push53=, 3
-; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53
-; NO-SIMD128-NEXT: i32.const $push55=, 255
-; NO-SIMD128-NEXT: i32.and $push51=, $20, $pop55
-; NO-SIMD128-NEXT: i32.shl $push52=, $4, $pop51
-; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52
+; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0
+; NO-SIMD128-NEXT: i32.shl $push2=, $16, $pop1
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push47=, 255
+; NO-SIMD128-NEXT: i32.and $push3=, $31, $pop47
+; NO-SIMD128-NEXT: i32.shl $push4=, $15, $pop3
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push46=, 255
+; NO-SIMD128-NEXT: i32.and $push5=, $30, $pop46
+; NO-SIMD128-NEXT: i32.shl $push6=, $14, $pop5
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push45=, 255
+; NO-SIMD128-NEXT: i32.and $push7=, $29, $pop45
+; NO-SIMD128-NEXT: i32.shl $push8=, $13, $pop7
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push44=, 255
+; NO-SIMD128-NEXT: i32.and $push9=, $28, $pop44
+; NO-SIMD128-NEXT: i32.shl $push10=, $12, $pop9
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push43=, 255
+; NO-SIMD128-NEXT: i32.and $push11=, $27, $pop43
+; NO-SIMD128-NEXT: i32.shl $push12=, $11, $pop11
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push42=, 255
+; NO-SIMD128-NEXT: i32.and $push13=, $26, $pop42
+; NO-SIMD128-NEXT: i32.shl $push14=, $10, $pop13
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push41=, 255
+; NO-SIMD128-NEXT: i32.and $push15=, $25, $pop41
+; NO-SIMD128-NEXT: i32.shl $push16=, $9, $pop15
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop16
+; NO-SIMD128-NEXT: i32.const $push40=, 255
+; NO-SIMD128-NEXT: i32.and $push17=, $24, $pop40
+; NO-SIMD128-NEXT: i32.shl $push18=, $8, $pop17
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop18
+; NO-SIMD128-NEXT: i32.const $push39=, 255
+; NO-SIMD128-NEXT: i32.and $push19=, $23, $pop39
+; NO-SIMD128-NEXT: i32.shl $push20=, $7, $pop19
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop20
+; NO-SIMD128-NEXT: i32.const $push38=, 255
+; NO-SIMD128-NEXT: i32.and $push21=, $22, $pop38
+; NO-SIMD128-NEXT: i32.shl $push22=, $6, $pop21
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop22
+; NO-SIMD128-NEXT: i32.const $push37=, 255
+; NO-SIMD128-NEXT: i32.and $push23=, $21, $pop37
+; NO-SIMD128-NEXT: i32.shl $push24=, $5, $pop23
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop24
+; NO-SIMD128-NEXT: i32.const $push36=, 255
+; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop36
+; NO-SIMD128-NEXT: i32.shl $push26=, $4, $pop25
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop26
+; NO-SIMD128-NEXT: i32.const $push35=, 255
+; NO-SIMD128-NEXT: i32.and $push27=, $19, $pop35
+; NO-SIMD128-NEXT: i32.shl $push28=, $3, $pop27
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop28
+; NO-SIMD128-NEXT: i32.const $push34=, 255
+; NO-SIMD128-NEXT: i32.and $push29=, $18, $pop34
+; NO-SIMD128-NEXT: i32.shl $push30=, $2, $pop29
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop30
+; NO-SIMD128-NEXT: i32.const $push33=, 255
+; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop33
+; NO-SIMD128-NEXT: i32.shl $push32=, $1, $pop31
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop32
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_vec_v16i8:
@@ -3342,88 +2748,66 @@ define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0
; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop69
+; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop47
; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop68
+; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop46
; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push9=, $20, $pop67
-; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $21, $pop66
-; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $22, $pop65
-; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop64
-; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $24, $pop63
-; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24
-; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop62
-; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $9, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $26, $pop61
-; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $10, $pop29
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push33=, $27, $pop60
-; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $11, $pop33
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push37=, $28, $pop59
-; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $12, $pop37
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push41=, $29, $pop58
-; NO-SIMD128-FAST-NEXT: i32.shl $push42=, $13, $pop41
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43
-; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push45=, $30, $pop57
-; NO-SIMD128-FAST-NEXT: i32.shl $push46=, $14, $pop45
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47
-; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push49=, $31, $pop56
-; NO-SIMD128-FAST-NEXT: i32.shl $push50=, $15, $pop49
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push53=, $32, $pop55
-; NO-SIMD128-FAST-NEXT: i32.shl $push54=, $16, $pop53
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54
+; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $20, $pop45
+; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $21, $pop44
+; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $5, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $22, $pop43
+; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $6, $pop11
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $23, $pop42
+; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $7, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push15=, $24, $pop41
+; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop15
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $25, $pop40
+; NO-SIMD128-FAST-NEXT: i32.shl $push18=, $9, $pop17
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $26, $pop39
+; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push21=, $27, $pop38
+; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop21
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $28, $pop37
+; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $12, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $29, $pop36
+; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $13, $pop25
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push27=, $30, $pop35
+; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $14, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $31, $pop34
+; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $15, $pop29
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push31=, $32, $pop33
+; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $16, $pop31
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32
; NO-SIMD128-FAST-NEXT: return
%a = shl <16 x i8> %v, %x
ret <16 x i8> %a
@@ -3445,79 +2829,57 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-LABEL: shr_s_v16i8:
; NO-SIMD128: .functype shr_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.extend8_s $push1=, $9
+; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16
; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push56=, $17, $pop0
-; NO-SIMD128-NEXT: local.tee $push55=, $17=, $pop56
-; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop55
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop2
-; NO-SIMD128-NEXT: i32.extend8_s $push3=, $5
+; NO-SIMD128-NEXT: i32.and $push34=, $17, $pop0
+; NO-SIMD128-NEXT: local.tee $push33=, $17=, $pop34
+; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop33
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop2
+; NO-SIMD128-NEXT: i32.extend8_s $push3=, $15
; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $17
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop4
-; NO-SIMD128-NEXT: i32.extend8_s $push5=, $3
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop4
+; NO-SIMD128-NEXT: i32.extend8_s $push5=, $14
; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $17
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop6
-; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop6
+; NO-SIMD128-NEXT: i32.extend8_s $push7=, $13
; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $17
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop8
-; NO-SIMD128-NEXT: i32.extend8_s $push9=, $1
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop8
+; NO-SIMD128-NEXT: i32.extend8_s $push9=, $12
; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop10
-; NO-SIMD128-NEXT: i32.const $push13=, 15
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.extend8_s $push11=, $16
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop10
+; NO-SIMD128-NEXT: i32.extend8_s $push11=, $11
; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push17=, 14
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.extend8_s $push15=, $15
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop12
+; NO-SIMD128-NEXT: i32.extend8_s $push13=, $10
+; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $17
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop14
+; NO-SIMD128-NEXT: i32.extend8_s $push15=, $9
; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push21=, 13
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.extend8_s $push19=, $14
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop16
+; NO-SIMD128-NEXT: i32.extend8_s $push17=, $8
+; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $17
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop18
+; NO-SIMD128-NEXT: i32.extend8_s $push19=, $7
; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push25=, 12
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.extend8_s $push23=, $13
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop20
+; NO-SIMD128-NEXT: i32.extend8_s $push21=, $6
+; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $17
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop22
+; NO-SIMD128-NEXT: i32.extend8_s $push23=, $5
; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push29=, 11
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.extend8_s $push27=, $12
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop24
+; NO-SIMD128-NEXT: i32.extend8_s $push25=, $4
+; NO-SIMD128-NEXT: i32.shr_s $push26=, $pop25, $17
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop26
+; NO-SIMD128-NEXT: i32.extend8_s $push27=, $3
; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push33=, 10
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop28
+; NO-SIMD128-NEXT: i32.extend8_s $push29=, $2
+; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $17
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop30
+; NO-SIMD128-NEXT: i32.extend8_s $push31=, $1
; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop31, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push37=, 9
-; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37
-; NO-SIMD128-NEXT: i32.extend8_s $push35=, $10
-; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36
-; NO-SIMD128-NEXT: i32.const $push41=, 7
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8
-; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop39, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.const $push45=, 6
-; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45
-; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7
-; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop43, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44
-; NO-SIMD128-NEXT: i32.const $push49=, 5
-; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-NEXT: i32.extend8_s $push47=, $6
-; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-NEXT: i32.const $push53=, 3
-; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53
-; NO-SIMD128-NEXT: i32.extend8_s $push51=, $4
-; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop51, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop32
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_s_v16i8:
@@ -3525,9 +2887,9 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $1
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push56=, $17, $pop0
-; NO-SIMD128-FAST-NEXT: local.tee $push55=, $1=, $pop56
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop55
+; NO-SIMD128-FAST-NEXT: i32.and $push34=, $17, $pop0
+; NO-SIMD128-FAST-NEXT: local.tee $push33=, $1=, $pop34
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop33
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push3=, $2
; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1
@@ -3535,67 +2897,45 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $3
; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $4
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $4
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $5
; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $5
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $6
; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $6
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $8
; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $9
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $10
; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $11
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $12
; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $13
; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push27=, $14
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push28=, $pop27, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $15
; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $11
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop33, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $12
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop37, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $13
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $14
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop45, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $16
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push54=, $pop53, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $16
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <16 x i8> undef, i8 %x, i32 0
%s = shufflevector <16 x i8> %t, <16 x i8> undef,
@@ -3811,108 +3151,86 @@ define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
; NO-SIMD128-LABEL: shr_s_vec_v16i8:
; NO-SIMD128: .functype shr_s_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.extend8_s $push2=, $9
+; NO-SIMD128-NEXT: i32.extend8_s $push2=, $16
; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0
+; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0
; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop3
-; NO-SIMD128-NEXT: i32.extend8_s $push5=, $5
-; NO-SIMD128-NEXT: i32.const $push85=, 255
-; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop85
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop3
+; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15
+; NO-SIMD128-NEXT: i32.const $push63=, 255
+; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop63
; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop6
-; NO-SIMD128-NEXT: i32.extend8_s $push8=, $3
-; NO-SIMD128-NEXT: i32.const $push84=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop84
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop6
+; NO-SIMD128-NEXT: i32.extend8_s $push8=, $14
+; NO-SIMD128-NEXT: i32.const $push62=, 255
+; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop62
; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop9
-; NO-SIMD128-NEXT: i32.extend8_s $push11=, $2
-; NO-SIMD128-NEXT: i32.const $push83=, 255
-; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop83
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop9
+; NO-SIMD128-NEXT: i32.extend8_s $push11=, $13
+; NO-SIMD128-NEXT: i32.const $push61=, 255
+; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop61
; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop12
-; NO-SIMD128-NEXT: i32.extend8_s $push14=, $1
-; NO-SIMD128-NEXT: i32.const $push82=, 255
-; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop82
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-NEXT: i32.extend8_s $push14=, $12
+; NO-SIMD128-NEXT: i32.const $push60=, 255
+; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop60
; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
-; NO-SIMD128-NEXT: i32.const $push19=, 15
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.extend8_s $push17=, $16
-; NO-SIMD128-NEXT: i32.const $push81=, 255
-; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop81
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop15
+; NO-SIMD128-NEXT: i32.extend8_s $push17=, $11
+; NO-SIMD128-NEXT: i32.const $push59=, 255
+; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop59
; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16
-; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push24=, 14
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.extend8_s $push22=, $15
-; NO-SIMD128-NEXT: i32.const $push80=, 255
-; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop80
-; NO-SIMD128-NEXT: i32.shr_s $push23=, $pop22, $pop21
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push29=, 13
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.extend8_s $push27=, $14
-; NO-SIMD128-NEXT: i32.const $push79=, 255
-; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop79
-; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $pop26
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push34=, 12
-; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34
-; NO-SIMD128-NEXT: i32.extend8_s $push32=, $13
-; NO-SIMD128-NEXT: i32.const $push78=, 255
-; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop78
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop18
+; NO-SIMD128-NEXT: i32.extend8_s $push20=, $10
+; NO-SIMD128-NEXT: i32.const $push58=, 255
+; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop58
+; NO-SIMD128-NEXT: i32.shr_s $push21=, $pop20, $pop19
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop21
+; NO-SIMD128-NEXT: i32.extend8_s $push23=, $9
+; NO-SIMD128-NEXT: i32.const $push57=, 255
+; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop57
+; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop24
+; NO-SIMD128-NEXT: i32.extend8_s $push26=, $8
+; NO-SIMD128-NEXT: i32.const $push56=, 255
+; NO-SIMD128-NEXT: i32.and $push25=, $24, $pop56
+; NO-SIMD128-NEXT: i32.shr_s $push27=, $pop26, $pop25
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop27
+; NO-SIMD128-NEXT: i32.extend8_s $push29=, $7
+; NO-SIMD128-NEXT: i32.const $push55=, 255
+; NO-SIMD128-NEXT: i32.and $push28=, $23, $pop55
+; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop30
+; NO-SIMD128-NEXT: i32.extend8_s $push32=, $6
+; NO-SIMD128-NEXT: i32.const $push54=, 255
+; NO-SIMD128-NEXT: i32.and $push31=, $22, $pop54
; NO-SIMD128-NEXT: i32.shr_s $push33=, $pop32, $pop31
-; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33
-; NO-SIMD128-NEXT: i32.const $push39=, 11
-; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-NEXT: i32.extend8_s $push37=, $12
-; NO-SIMD128-NEXT: i32.const $push77=, 255
-; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop77
-; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop37, $pop36
-; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38
-; NO-SIMD128-NEXT: i32.const $push44=, 10
-; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44
-; NO-SIMD128-NEXT: i32.extend8_s $push42=, $11
-; NO-SIMD128-NEXT: i32.const $push76=, 255
-; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop76
-; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop42, $pop41
-; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43
-; NO-SIMD128-NEXT: i32.const $push49=, 9
-; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-NEXT: i32.extend8_s $push47=, $10
-; NO-SIMD128-NEXT: i32.const $push75=, 255
-; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop75
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop33
+; NO-SIMD128-NEXT: i32.extend8_s $push35=, $5
+; NO-SIMD128-NEXT: i32.const $push53=, 255
+; NO-SIMD128-NEXT: i32.and $push34=, $21, $pop53
+; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $pop34
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop36
+; NO-SIMD128-NEXT: i32.extend8_s $push38=, $4
+; NO-SIMD128-NEXT: i32.const $push52=, 255
+; NO-SIMD128-NEXT: i32.and $push37=, $20, $pop52
+; NO-SIMD128-NEXT: i32.shr_s $push39=, $pop38, $pop37
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop39
+; NO-SIMD128-NEXT: i32.extend8_s $push41=, $3
+; NO-SIMD128-NEXT: i32.const $push51=, 255
+; NO-SIMD128-NEXT: i32.and $push40=, $19, $pop51
+; NO-SIMD128-NEXT: i32.shr_s $push42=, $pop41, $pop40
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop42
+; NO-SIMD128-NEXT: i32.extend8_s $push44=, $2
+; NO-SIMD128-NEXT: i32.const $push50=, 255
+; NO-SIMD128-NEXT: i32.and $push43=, $18, $pop50
+; NO-SIMD128-NEXT: i32.shr_s $push45=, $pop44, $pop43
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop45
+; NO-SIMD128-NEXT: i32.extend8_s $push47=, $1
+; NO-SIMD128-NEXT: i32.const $push49=, 255
+; NO-SIMD128-NEXT: i32.and $push46=, $17, $pop49
; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $pop46
-; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-NEXT: i32.const $push54=, 7
-; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54
-; NO-SIMD128-NEXT: i32.extend8_s $push52=, $8
-; NO-SIMD128-NEXT: i32.const $push74=, 255
-; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop74
-; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop52, $pop51
-; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53
-; NO-SIMD128-NEXT: i32.const $push59=, 6
-; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59
-; NO-SIMD128-NEXT: i32.extend8_s $push57=, $7
-; NO-SIMD128-NEXT: i32.const $push73=, 255
-; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop73
-; NO-SIMD128-NEXT: i32.shr_s $push58=, $pop57, $pop56
-; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58
-; NO-SIMD128-NEXT: i32.const $push64=, 5
-; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64
-; NO-SIMD128-NEXT: i32.extend8_s $push62=, $6
-; NO-SIMD128-NEXT: i32.const $push72=, 255
-; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop72
-; NO-SIMD128-NEXT: i32.shr_s $push63=, $pop62, $pop61
-; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63
-; NO-SIMD128-NEXT: i32.const $push69=, 3
-; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69
-; NO-SIMD128-NEXT: i32.extend8_s $push67=, $4
-; NO-SIMD128-NEXT: i32.const $push71=, 255
-; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71
-; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop67, $pop66
-; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop48
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_s_vec_v16i8:
@@ -3924,102 +3242,80 @@ define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $2
-; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop85
+; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop63
; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push8=, $3
-; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop84
+; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop62
; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4
-; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $20, $pop83
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop14
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $5
-; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop82
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6
-; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $22, $pop81
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $7
-; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop80
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $4
+; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop61
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop10
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $5
+; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop60
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $6
+; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop59
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $7
+; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop58
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8
+; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop57
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $9
+; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop56
; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push28=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $8
-; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $24, $pop79
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop29), $pop32
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $9
-; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop78
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop34, $pop33
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push39=, $10
-; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push38=, $26, $pop77
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop39, $pop38
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop40
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $11
-; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push43=, $27, $pop76
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10
+; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $26, $pop55
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $pop28
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $11
+; NO-SIMD128-FAST-NEXT: i32.const $push54=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push31=, $27, $pop54
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push33=, $pop32, $pop31
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $12
+; NO-SIMD128-FAST-NEXT: i32.const $push53=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push34=, $28, $pop53
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push36=, $pop35, $pop34
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push38=, $13
+; NO-SIMD128-FAST-NEXT: i32.const $push52=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push37=, $29, $pop52
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push39=, $pop38, $pop37
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $14
+; NO-SIMD128-FAST-NEXT: i32.const $push51=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push40=, $30, $pop51
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $pop40
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $15
+; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push43=, $31, $pop50
; NO-SIMD128-FAST-NEXT: i32.shr_s $push45=, $pop44, $pop43
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop45
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $12
-; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push48=, $28, $pop75
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $pop48
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop50
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push54=, $13
-; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push53=, $29, $pop74
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop54, $pop53
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop55
-; NO-SIMD128-FAST-NEXT: i32.const $push56=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push57=, $0, $pop56
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push59=, $14
-; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push58=, $30, $pop73
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push60=, $pop59, $pop58
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop57), $pop60
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push64=, $15
-; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push63=, $31, $pop72
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop64, $pop63
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop65
-; NO-SIMD128-FAST-NEXT: i32.const $push66=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $16
-; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push68=, $32, $pop71
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push70=, $pop69, $pop68
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop70
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push47=, $16
+; NO-SIMD128-FAST-NEXT: i32.const $push49=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push46=, $32, $pop49
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push48=, $pop47, $pop46
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48
; NO-SIMD128-FAST-NEXT: return
%a = ashr <16 x i8> %v, %x
ret <16 x i8> %a
@@ -4042,94 +3338,72 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128: .functype shr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $9, $pop0
-; NO-SIMD128-NEXT: i32.const $push72=, 255
-; NO-SIMD128-NEXT: i32.and $push71=, $17, $pop72
-; NO-SIMD128-NEXT: local.tee $push70=, $17=, $pop71
-; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop70
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push69=, 255
-; NO-SIMD128-NEXT: i32.and $push3=, $5, $pop69
+; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0
+; NO-SIMD128-NEXT: i32.const $push50=, 255
+; NO-SIMD128-NEXT: i32.and $push49=, $17, $pop50
+; NO-SIMD128-NEXT: local.tee $push48=, $17=, $pop49
+; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop48
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push47=, 255
+; NO-SIMD128-NEXT: i32.and $push3=, $15, $pop47
; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $17
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push68=, 255
-; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop68
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push46=, 255
+; NO-SIMD128-NEXT: i32.and $push5=, $14, $pop46
; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $17
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push67=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $2, $pop67
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push45=, 255
+; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop45
; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $17
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push66=, 255
-; NO-SIMD128-NEXT: i32.and $push9=, $1, $pop66
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push44=, 255
+; NO-SIMD128-NEXT: i32.and $push9=, $12, $pop44
; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop10
-; NO-SIMD128-NEXT: i32.const $push13=, 15
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.const $push65=, 255
-; NO-SIMD128-NEXT: i32.and $push11=, $16, $pop65
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push43=, 255
+; NO-SIMD128-NEXT: i32.and $push11=, $11, $pop43
; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push17=, 14
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.const $push64=, 255
-; NO-SIMD128-NEXT: i32.and $push15=, $15, $pop64
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push42=, 255
+; NO-SIMD128-NEXT: i32.and $push13=, $10, $pop42
+; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $17
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push41=, 255
+; NO-SIMD128-NEXT: i32.and $push15=, $9, $pop41
; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push21=, 13
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.const $push63=, 255
-; NO-SIMD128-NEXT: i32.and $push19=, $14, $pop63
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop16
+; NO-SIMD128-NEXT: i32.const $push40=, 255
+; NO-SIMD128-NEXT: i32.and $push17=, $8, $pop40
+; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $17
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop18
+; NO-SIMD128-NEXT: i32.const $push39=, 255
+; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop39
; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push25=, 12
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.const $push62=, 255
-; NO-SIMD128-NEXT: i32.and $push23=, $13, $pop62
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop20
+; NO-SIMD128-NEXT: i32.const $push38=, 255
+; NO-SIMD128-NEXT: i32.and $push21=, $6, $pop38
+; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $17
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop22
+; NO-SIMD128-NEXT: i32.const $push37=, 255
+; NO-SIMD128-NEXT: i32.and $push23=, $5, $pop37
; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push29=, 11
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.const $push61=, 255
-; NO-SIMD128-NEXT: i32.and $push27=, $12, $pop61
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop24
+; NO-SIMD128-NEXT: i32.const $push36=, 255
+; NO-SIMD128-NEXT: i32.and $push25=, $4, $pop36
+; NO-SIMD128-NEXT: i32.shr_u $push26=, $pop25, $17
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop26
+; NO-SIMD128-NEXT: i32.const $push35=, 255
+; NO-SIMD128-NEXT: i32.and $push27=, $3, $pop35
; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push33=, 10
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.const $push60=, 255
-; NO-SIMD128-NEXT: i32.and $push31=, $11, $pop60
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop28
+; NO-SIMD128-NEXT: i32.const $push34=, 255
+; NO-SIMD128-NEXT: i32.and $push29=, $2, $pop34
+; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $17
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop30
+; NO-SIMD128-NEXT: i32.const $push33=, 255
+; NO-SIMD128-NEXT: i32.and $push31=, $1, $pop33
; NO-SIMD128-NEXT: i32.shr_u $push32=, $pop31, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push37=, 9
-; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37
-; NO-SIMD128-NEXT: i32.const $push59=, 255
-; NO-SIMD128-NEXT: i32.and $push35=, $10, $pop59
-; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36
-; NO-SIMD128-NEXT: i32.const $push41=, 7
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.const $push58=, 255
-; NO-SIMD128-NEXT: i32.and $push39=, $8, $pop58
-; NO-SIMD128-NEXT: i32.shr_u $push40=, $pop39, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.const $push45=, 6
-; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45
-; NO-SIMD128-NEXT: i32.const $push57=, 255
-; NO-SIMD128-NEXT: i32.and $push43=, $7, $pop57
-; NO-SIMD128-NEXT: i32.shr_u $push44=, $pop43, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44
-; NO-SIMD128-NEXT: i32.const $push49=, 5
-; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-NEXT: i32.const $push56=, 255
-; NO-SIMD128-NEXT: i32.and $push47=, $6, $pop56
-; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-NEXT: i32.const $push53=, 3
-; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53
-; NO-SIMD128-NEXT: i32.const $push55=, 255
-; NO-SIMD128-NEXT: i32.and $push51=, $4, $pop55
-; NO-SIMD128-NEXT: i32.shr_u $push52=, $pop51, $17
-; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop32
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_u_v16i8:
@@ -4137,93 +3411,71 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push71=, $17, $pop72
-; NO-SIMD128-FAST-NEXT: local.tee $push70=, $1=, $pop71
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop70
+; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push49=, $17, $pop50
+; NO-SIMD128-FAST-NEXT: local.tee $push48=, $1=, $pop49
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop48
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop69
+; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop47
; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop68
+; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop46
; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop67
+; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop45
; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop66
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $5, $pop44
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push10=, $pop9, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $6, $pop43
; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop65
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $7, $pop42
; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop16), $pop14
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop64
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $pop41
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $9, $pop40
; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop20), $pop18
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop63
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $pop39
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push21=, $11, $pop38
; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop62
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $12, $pop37
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $13, $pop36
; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push27=, $10, $pop61
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push27=, $14, $pop35
; NO-SIMD128-FAST-NEXT: i32.shr_u $push28=, $pop27, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push31=, $11, $pop60
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop34
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $1
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push31=, $16, $pop33
; NO-SIMD128-FAST-NEXT: i32.shr_u $push32=, $pop31, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push37=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push38=, $0, $pop37
-; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop59
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop38), $pop36
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push39=, $13, $pop58
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push40=, $pop39, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45
-; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push43=, $14, $pop57
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push44=, $pop43, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push47=, $15, $pop56
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push51=, $16, $pop55
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push52=, $pop51, $1
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <16 x i8> undef, i8 %x, i32 0
%s = shufflevector <16 x i8> %t, <16 x i8> undef,
@@ -4440,123 +3692,101 @@ define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
; NO-SIMD128: .functype shr_u_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop0
-; NO-SIMD128-NEXT: i32.const $push101=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop101
-; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push100=, 255
-; NO-SIMD128-NEXT: i32.and $push5=, $5, $pop100
-; NO-SIMD128-NEXT: i32.const $push99=, 255
-; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop99
-; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push98=, 255
-; NO-SIMD128-NEXT: i32.and $push8=, $3, $pop98
-; NO-SIMD128-NEXT: i32.const $push97=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop97
-; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop9
-; NO-SIMD128-NEXT: i32.const $push96=, 255
-; NO-SIMD128-NEXT: i32.and $push11=, $2, $pop96
-; NO-SIMD128-NEXT: i32.const $push95=, 255
-; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop95
-; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop12
-; NO-SIMD128-NEXT: i32.const $push94=, 255
-; NO-SIMD128-NEXT: i32.and $push14=, $1, $pop94
-; NO-SIMD128-NEXT: i32.const $push93=, 255
-; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop93
-; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
-; NO-SIMD128-NEXT: i32.const $push19=, 15
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.const $push92=, 255
-; NO-SIMD128-NEXT: i32.and $push17=, $16, $pop92
-; NO-SIMD128-NEXT: i32.const $push91=, 255
-; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop91
-; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16
-; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push24=, 14
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.const $push90=, 255
-; NO-SIMD128-NEXT: i32.and $push22=, $15, $pop90
-; NO-SIMD128-NEXT: i32.const $push89=, 255
-; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop89
-; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop21
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push29=, 13
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.const $push88=, 255
-; NO-SIMD128-NEXT: i32.and $push27=, $14, $pop88
-; NO-SIMD128-NEXT: i32.const $push87=, 255
-; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop87
-; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $pop26
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push34=, 12
-; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34
-; NO-SIMD128-NEXT: i32.const $push86=, 255
-; NO-SIMD128-NEXT: i32.and $push32=, $13, $pop86
-; NO-SIMD128-NEXT: i32.const $push85=, 255
-; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop85
-; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31
-; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33
-; NO-SIMD128-NEXT: i32.const $push39=, 11
-; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-NEXT: i32.const $push84=, 255
-; NO-SIMD128-NEXT: i32.and $push37=, $12, $pop84
-; NO-SIMD128-NEXT: i32.const $push83=, 255
-; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop83
-; NO-SIMD128-NEXT: i32.shr_u $push38=, $pop37, $pop36
-; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38
-; NO-SIMD128-NEXT: i32.const $push44=, 10
-; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44
-; NO-SIMD128-NEXT: i32.const $push82=, 255
-; NO-SIMD128-NEXT: i32.and $push42=, $11, $pop82
-; NO-SIMD128-NEXT: i32.const $push81=, 255
-; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop81
-; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop41
-; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43
-; NO-SIMD128-NEXT: i32.const $push49=, 9
-; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-NEXT: i32.const $push80=, 255
-; NO-SIMD128-NEXT: i32.and $push47=, $10, $pop80
+; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0
; NO-SIMD128-NEXT: i32.const $push79=, 255
-; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop79
-; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46
-; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-NEXT: i32.const $push54=, 7
-; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54
+; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop79
+; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop3
; NO-SIMD128-NEXT: i32.const $push78=, 255
-; NO-SIMD128-NEXT: i32.and $push52=, $8, $pop78
+; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop78
; NO-SIMD128-NEXT: i32.const $push77=, 255
-; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop77
-; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop51
-; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53
-; NO-SIMD128-NEXT: i32.const $push59=, 6
-; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59
+; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop77
+; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop6
; NO-SIMD128-NEXT: i32.const $push76=, 255
-; NO-SIMD128-NEXT: i32.and $push57=, $7, $pop76
+; NO-SIMD128-NEXT: i32.and $push8=, $14, $pop76
; NO-SIMD128-NEXT: i32.const $push75=, 255
-; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop75
-; NO-SIMD128-NEXT: i32.shr_u $push58=, $pop57, $pop56
-; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58
-; NO-SIMD128-NEXT: i32.const $push64=, 5
-; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64
+; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop75
+; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop9
; NO-SIMD128-NEXT: i32.const $push74=, 255
-; NO-SIMD128-NEXT: i32.and $push62=, $6, $pop74
+; NO-SIMD128-NEXT: i32.and $push11=, $13, $pop74
; NO-SIMD128-NEXT: i32.const $push73=, 255
-; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop73
-; NO-SIMD128-NEXT: i32.shr_u $push63=, $pop62, $pop61
-; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63
-; NO-SIMD128-NEXT: i32.const $push69=, 3
-; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69
+; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop73
+; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop12
; NO-SIMD128-NEXT: i32.const $push72=, 255
-; NO-SIMD128-NEXT: i32.and $push67=, $4, $pop72
+; NO-SIMD128-NEXT: i32.and $push14=, $12, $pop72
; NO-SIMD128-NEXT: i32.const $push71=, 255
-; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71
-; NO-SIMD128-NEXT: i32.shr_u $push68=, $pop67, $pop66
-; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68
+; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop71
+; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop15
+; NO-SIMD128-NEXT: i32.const $push70=, 255
+; NO-SIMD128-NEXT: i32.and $push17=, $11, $pop70
+; NO-SIMD128-NEXT: i32.const $push69=, 255
+; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop69
+; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop18
+; NO-SIMD128-NEXT: i32.const $push68=, 255
+; NO-SIMD128-NEXT: i32.and $push20=, $10, $pop68
+; NO-SIMD128-NEXT: i32.const $push67=, 255
+; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop67
+; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop19
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop21
+; NO-SIMD128-NEXT: i32.const $push66=, 255
+; NO-SIMD128-NEXT: i32.and $push23=, $9, $pop66
+; NO-SIMD128-NEXT: i32.const $push65=, 255
+; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop65
+; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop24
+; NO-SIMD128-NEXT: i32.const $push64=, 255
+; NO-SIMD128-NEXT: i32.and $push26=, $8, $pop64
+; NO-SIMD128-NEXT: i32.const $push63=, 255
+; NO-SIMD128-NEXT: i32.and $push25=, $24, $pop63
+; NO-SIMD128-NEXT: i32.shr_u $push27=, $pop26, $pop25
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop27
+; NO-SIMD128-NEXT: i32.const $push62=, 255
+; NO-SIMD128-NEXT: i32.and $push29=, $7, $pop62
+; NO-SIMD128-NEXT: i32.const $push61=, 255
+; NO-SIMD128-NEXT: i32.and $push28=, $23, $pop61
+; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop30
+; NO-SIMD128-NEXT: i32.const $push60=, 255
+; NO-SIMD128-NEXT: i32.and $push32=, $6, $pop60
+; NO-SIMD128-NEXT: i32.const $push59=, 255
+; NO-SIMD128-NEXT: i32.and $push31=, $22, $pop59
+; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop33
+; NO-SIMD128-NEXT: i32.const $push58=, 255
+; NO-SIMD128-NEXT: i32.and $push35=, $5, $pop58
+; NO-SIMD128-NEXT: i32.const $push57=, 255
+; NO-SIMD128-NEXT: i32.and $push34=, $21, $pop57
+; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $pop34
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop36
+; NO-SIMD128-NEXT: i32.const $push56=, 255
+; NO-SIMD128-NEXT: i32.and $push38=, $4, $pop56
+; NO-SIMD128-NEXT: i32.const $push55=, 255
+; NO-SIMD128-NEXT: i32.and $push37=, $20, $pop55
+; NO-SIMD128-NEXT: i32.shr_u $push39=, $pop38, $pop37
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop39
+; NO-SIMD128-NEXT: i32.const $push54=, 255
+; NO-SIMD128-NEXT: i32.and $push41=, $3, $pop54
+; NO-SIMD128-NEXT: i32.const $push53=, 255
+; NO-SIMD128-NEXT: i32.and $push40=, $19, $pop53
+; NO-SIMD128-NEXT: i32.shr_u $push42=, $pop41, $pop40
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop42
+; NO-SIMD128-NEXT: i32.const $push52=, 255
+; NO-SIMD128-NEXT: i32.and $push44=, $2, $pop52
+; NO-SIMD128-NEXT: i32.const $push51=, 255
+; NO-SIMD128-NEXT: i32.and $push43=, $18, $pop51
+; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop43
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop45
+; NO-SIMD128-NEXT: i32.const $push50=, 255
+; NO-SIMD128-NEXT: i32.and $push47=, $1, $pop50
+; NO-SIMD128-NEXT: i32.const $push49=, 255
+; NO-SIMD128-NEXT: i32.and $push46=, $17, $pop49
+; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop48
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_u_vec_v16i8:
@@ -4564,122 +3794,100 @@ define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop101
+; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop79
; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop100
-; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop99
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4
-; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop98
-; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop97
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7
-; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop96
-; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop95
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop94
-; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop93
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop92
-; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop91
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop90
-; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop89
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop88
-; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop87
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop30
-; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop86
-; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop85
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop33
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $pop84
-; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push36=, $26, $pop83
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push38=, $pop37, $pop36
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push45=, $0, $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop82
-; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop81
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push43=, $pop42, $pop41
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop45), $pop43
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push47=, $12, $pop80
-; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push46=, $28, $pop79
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-FAST-NEXT: i32.const $push54=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54
; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push52=, $13, $pop78
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop78
; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push51=, $29, $pop77
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop51
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53
-; NO-SIMD128-FAST-NEXT: i32.const $push59=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push60=, $0, $pop59
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop77
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4
+; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push57=, $14, $pop76
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop76
; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push56=, $30, $pop75
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push58=, $pop57, $pop56
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop60), $pop58
-; NO-SIMD128-FAST-NEXT: i32.const $push64=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop75
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9
; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push62=, $15, $pop74
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop74
; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push61=, $31, $pop73
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop61
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop63
-; NO-SIMD128-FAST-NEXT: i32.const $push69=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push70=, $0, $pop69
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop73
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12
; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push67=, $16, $pop72
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop72
; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push66=, $32, $pop71
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push68=, $pop67, $pop66
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop70), $pop68
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop71
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop70
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop69
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop68
+; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop67
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop66
+; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop65
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop64
+; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop63
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push27=, $pop26, $pop25
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27
+; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $10, $pop62
+; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $26, $pop61
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push32=, $11, $pop60
+; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push31=, $27, $pop59
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop31
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33
+; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop58
+; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push34=, $28, $pop57
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $pop34
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push38=, $13, $pop56
+; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push37=, $29, $pop55
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push39=, $pop38, $pop37
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39
+; NO-SIMD128-FAST-NEXT: i32.const $push54=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push41=, $14, $pop54
+; NO-SIMD128-FAST-NEXT: i32.const $push53=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push40=, $30, $pop53
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push42=, $pop41, $pop40
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42
+; NO-SIMD128-FAST-NEXT: i32.const $push52=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push44=, $15, $pop52
+; NO-SIMD128-FAST-NEXT: i32.const $push51=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push43=, $31, $pop51
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop43
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45
+; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push47=, $16, $pop50
+; NO-SIMD128-FAST-NEXT: i32.const $push49=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push46=, $32, $pop49
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48
; NO-SIMD128-FAST-NEXT: return
%a = lshr <16 x i8> %v, %x
ret <16 x i8> %a
@@ -4701,60 +3909,38 @@ define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: and_v16i8:
; NO-SIMD128: .functype and_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.and $push0=, $9, $25
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop0
-; NO-SIMD128-NEXT: i32.and $push1=, $5, $21
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop1
-; NO-SIMD128-NEXT: i32.and $push2=, $3, $19
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-NEXT: i32.and $push3=, $2, $18
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop3
-; NO-SIMD128-NEXT: i32.and $push4=, $1, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 15
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.and $push5=, $16, $32
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 14
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.and $push8=, $15, $31
-; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 13
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.and $push11=, $14, $30
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.and $push14=, $13, $29
-; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push18=, 11
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.and $push17=, $12, $28
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17
-; NO-SIMD128-NEXT: i32.const $push21=, 10
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.and $push20=, $11, $27
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push24=, 9
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.and $push23=, $10, $26
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push27=, 7
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.and $push26=, $8, $24
-; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-NEXT: i32.const $push30=, 6
-; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-NEXT: i32.and $push29=, $7, $23
-; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29
-; NO-SIMD128-NEXT: i32.const $push33=, 5
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.and $push32=, $6, $22
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push36=, 3
-; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-NEXT: i32.and $push35=, $4, $20
-; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35
+; NO-SIMD128-NEXT: i32.and $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop0
+; NO-SIMD128-NEXT: i32.and $push1=, $15, $31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop1
+; NO-SIMD128-NEXT: i32.and $push2=, $14, $30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop2
+; NO-SIMD128-NEXT: i32.and $push3=, $13, $29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop3
+; NO-SIMD128-NEXT: i32.and $push4=, $12, $28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop4
+; NO-SIMD128-NEXT: i32.and $push5=, $11, $27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop5
+; NO-SIMD128-NEXT: i32.and $push6=, $10, $26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop6
+; NO-SIMD128-NEXT: i32.and $push7=, $9, $25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop7
+; NO-SIMD128-NEXT: i32.and $push8=, $8, $24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-NEXT: i32.and $push9=, $7, $23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop9
+; NO-SIMD128-NEXT: i32.and $push10=, $6, $22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop10
+; NO-SIMD128-NEXT: i32.and $push11=, $5, $21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop11
+; NO-SIMD128-NEXT: i32.and $push12=, $4, $20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-NEXT: i32.and $push13=, $3, $19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-NEXT: i32.and $push14=, $2, $18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop14
+; NO-SIMD128-NEXT: i32.and $push15=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: and_v16i8:
@@ -4766,54 +3952,32 @@ define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $19
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15
-; NO-SIMD128-FAST-NEXT: i32.and $push16=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $27
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $12, $28
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $13, $29
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-FAST-NEXT: i32.and $push31=, $14, $30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.and $push34=, $15, $31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.and $push37=, $16, $32
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37
+; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $20
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.and $push6=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.and $push15=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%a = and <16 x i8> %x, %y
ret <16 x i8> %a
@@ -4835,60 +3999,38 @@ define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: or_v16i8:
; NO-SIMD128: .functype or_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.or $push0=, $9, $25
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop0
-; NO-SIMD128-NEXT: i32.or $push1=, $5, $21
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop1
-; NO-SIMD128-NEXT: i32.or $push2=, $3, $19
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-NEXT: i32.or $push3=, $2, $18
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop3
-; NO-SIMD128-NEXT: i32.or $push4=, $1, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 15
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.or $push5=, $16, $32
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 14
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.or $push8=, $15, $31
-; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 13
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.or $push11=, $14, $30
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.or $push14=, $13, $29
-; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push18=, 11
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.or $push17=, $12, $28
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17
-; NO-SIMD128-NEXT: i32.const $push21=, 10
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.or $push20=, $11, $27
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push24=, 9
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.or $push23=, $10, $26
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push27=, 7
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.or $push26=, $8, $24
-; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-NEXT: i32.const $push30=, 6
-; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-NEXT: i32.or $push29=, $7, $23
-; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29
-; NO-SIMD128-NEXT: i32.const $push33=, 5
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.or $push32=, $6, $22
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push36=, 3
-; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-NEXT: i32.or $push35=, $4, $20
-; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35
+; NO-SIMD128-NEXT: i32.or $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop0
+; NO-SIMD128-NEXT: i32.or $push1=, $15, $31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop1
+; NO-SIMD128-NEXT: i32.or $push2=, $14, $30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop2
+; NO-SIMD128-NEXT: i32.or $push3=, $13, $29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop3
+; NO-SIMD128-NEXT: i32.or $push4=, $12, $28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop4
+; NO-SIMD128-NEXT: i32.or $push5=, $11, $27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop5
+; NO-SIMD128-NEXT: i32.or $push6=, $10, $26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop6
+; NO-SIMD128-NEXT: i32.or $push7=, $9, $25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop7
+; NO-SIMD128-NEXT: i32.or $push8=, $8, $24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-NEXT: i32.or $push9=, $7, $23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop9
+; NO-SIMD128-NEXT: i32.or $push10=, $6, $22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop10
+; NO-SIMD128-NEXT: i32.or $push11=, $5, $21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop11
+; NO-SIMD128-NEXT: i32.or $push12=, $4, $20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-NEXT: i32.or $push13=, $3, $19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-NEXT: i32.or $push14=, $2, $18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop14
+; NO-SIMD128-NEXT: i32.or $push15=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: or_v16i8:
@@ -4900,54 +4042,32 @@ define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $19
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15
-; NO-SIMD128-FAST-NEXT: i32.or $push16=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.or $push19=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-FAST-NEXT: i32.or $push22=, $11, $27
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.or $push25=, $12, $28
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.or $push28=, $13, $29
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-FAST-NEXT: i32.or $push31=, $14, $30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.or $push34=, $15, $31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.or $push37=, $16, $32
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37
+; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $20
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.or $push4=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.or $push5=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.or $push6=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.or $push7=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.or $push8=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.or $push9=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.or $push10=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.or $push11=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.or $push12=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.or $push13=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.or $push14=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.or $push15=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%a = or <16 x i8> %x, %y
ret <16 x i8> %a
@@ -4969,60 +4089,38 @@ define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: xor_v16i8:
; NO-SIMD128: .functype xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.xor $push0=, $9, $25
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop0
-; NO-SIMD128-NEXT: i32.xor $push1=, $5, $21
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop1
-; NO-SIMD128-NEXT: i32.xor $push2=, $3, $19
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-NEXT: i32.xor $push3=, $2, $18
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop3
-; NO-SIMD128-NEXT: i32.xor $push4=, $1, $17
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 15
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.xor $push5=, $16, $32
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 14
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.xor $push8=, $15, $31
-; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 13
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.xor $push11=, $14, $30
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.xor $push14=, $13, $29
-; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push18=, 11
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.xor $push17=, $12, $28
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17
-; NO-SIMD128-NEXT: i32.const $push21=, 10
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.xor $push20=, $11, $27
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push24=, 9
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.xor $push23=, $10, $26
-; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23
-; NO-SIMD128-NEXT: i32.const $push27=, 7
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.xor $push26=, $8, $24
-; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-NEXT: i32.const $push30=, 6
-; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-NEXT: i32.xor $push29=, $7, $23
-; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29
-; NO-SIMD128-NEXT: i32.const $push33=, 5
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.xor $push32=, $6, $22
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push36=, 3
-; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-NEXT: i32.xor $push35=, $4, $20
-; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35
+; NO-SIMD128-NEXT: i32.xor $push0=, $16, $32
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop0
+; NO-SIMD128-NEXT: i32.xor $push1=, $15, $31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop1
+; NO-SIMD128-NEXT: i32.xor $push2=, $14, $30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop2
+; NO-SIMD128-NEXT: i32.xor $push3=, $13, $29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop3
+; NO-SIMD128-NEXT: i32.xor $push4=, $12, $28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop4
+; NO-SIMD128-NEXT: i32.xor $push5=, $11, $27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop5
+; NO-SIMD128-NEXT: i32.xor $push6=, $10, $26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop6
+; NO-SIMD128-NEXT: i32.xor $push7=, $9, $25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop7
+; NO-SIMD128-NEXT: i32.xor $push8=, $8, $24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-NEXT: i32.xor $push9=, $7, $23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop9
+; NO-SIMD128-NEXT: i32.xor $push10=, $6, $22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop10
+; NO-SIMD128-NEXT: i32.xor $push11=, $5, $21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop11
+; NO-SIMD128-NEXT: i32.xor $push12=, $4, $20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop12
+; NO-SIMD128-NEXT: i32.xor $push13=, $3, $19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop13
+; NO-SIMD128-NEXT: i32.xor $push14=, $2, $18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop14
+; NO-SIMD128-NEXT: i32.xor $push15=, $1, $17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: xor_v16i8:
@@ -5034,54 +4132,32 @@ define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $19
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15
-; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $11, $27
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $12, $28
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $13, $29
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $14, $30
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $15, $31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $16, $32
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $20
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%a = xor <16 x i8> %x, %y
ret <16 x i8> %a
@@ -5104,75 +4180,53 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) {
; NO-SIMD128: .functype not_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, -1
-; NO-SIMD128-NEXT: i32.xor $push1=, $9, $pop0
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push53=, -1
-; NO-SIMD128-NEXT: i32.xor $push2=, $5, $pop53
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push52=, -1
-; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop52
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push51=, -1
-; NO-SIMD128-NEXT: i32.xor $push4=, $2, $pop51
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push50=, -1
-; NO-SIMD128-NEXT: i32.xor $push5=, $1, $pop50
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push7=, 15
-; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-NEXT: i32.const $push49=, -1
-; NO-SIMD128-NEXT: i32.xor $push6=, $16, $pop49
-; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6
-; NO-SIMD128-NEXT: i32.const $push10=, 14
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.const $push48=, -1
-; NO-SIMD128-NEXT: i32.xor $push9=, $15, $pop48
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9
-; NO-SIMD128-NEXT: i32.const $push13=, 13
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.const $push47=, -1
-; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop47
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push16=, 12
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.const $push46=, -1
-; NO-SIMD128-NEXT: i32.xor $push15=, $13, $pop46
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push19=, 11
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.const $push45=, -1
-; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop45
-; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push22=, 10
-; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-NEXT: i32.const $push44=, -1
-; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop44
-; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21
-; NO-SIMD128-NEXT: i32.const $push25=, 9
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.const $push43=, -1
-; NO-SIMD128-NEXT: i32.xor $push24=, $10, $pop43
-; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push28=, 7
-; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-NEXT: i32.const $push42=, -1
-; NO-SIMD128-NEXT: i32.xor $push27=, $8, $pop42
-; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27
-; NO-SIMD128-NEXT: i32.const $push31=, 6
-; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-NEXT: i32.const $push41=, -1
-; NO-SIMD128-NEXT: i32.xor $push30=, $7, $pop41
-; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30
-; NO-SIMD128-NEXT: i32.const $push34=, 5
-; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34
-; NO-SIMD128-NEXT: i32.const $push40=, -1
-; NO-SIMD128-NEXT: i32.xor $push33=, $6, $pop40
-; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33
-; NO-SIMD128-NEXT: i32.const $push37=, 3
-; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37
-; NO-SIMD128-NEXT: i32.const $push39=, -1
-; NO-SIMD128-NEXT: i32.xor $push36=, $4, $pop39
-; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36
+; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop1
+; NO-SIMD128-NEXT: i32.const $push31=, -1
+; NO-SIMD128-NEXT: i32.xor $push2=, $15, $pop31
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push30=, -1
+; NO-SIMD128-NEXT: i32.xor $push3=, $14, $pop30
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push29=, -1
+; NO-SIMD128-NEXT: i32.xor $push4=, $13, $pop29
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push28=, -1
+; NO-SIMD128-NEXT: i32.xor $push5=, $12, $pop28
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop5
+; NO-SIMD128-NEXT: i32.const $push27=, -1
+; NO-SIMD128-NEXT: i32.xor $push6=, $11, $pop27
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push26=, -1
+; NO-SIMD128-NEXT: i32.xor $push7=, $10, $pop26
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop7
+; NO-SIMD128-NEXT: i32.const $push25=, -1
+; NO-SIMD128-NEXT: i32.xor $push8=, $9, $pop25
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push24=, -1
+; NO-SIMD128-NEXT: i32.xor $push9=, $8, $pop24
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop9
+; NO-SIMD128-NEXT: i32.const $push23=, -1
+; NO-SIMD128-NEXT: i32.xor $push10=, $7, $pop23
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push22=, -1
+; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop22
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop11
+; NO-SIMD128-NEXT: i32.const $push21=, -1
+; NO-SIMD128-NEXT: i32.xor $push12=, $5, $pop21
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push20=, -1
+; NO-SIMD128-NEXT: i32.xor $push13=, $4, $pop20
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop13
+; NO-SIMD128-NEXT: i32.const $push19=, -1
+; NO-SIMD128-NEXT: i32.xor $push14=, $3, $pop19
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push18=, -1
+; NO-SIMD128-NEXT: i32.xor $push15=, $2, $pop18
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop15
+; NO-SIMD128-NEXT: i32.const $push17=, -1
+; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop17
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: not_v16i8:
@@ -5181,73 +4235,51 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1
; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop53
+; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop31
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop52
+; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop30
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop51
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop50
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop49
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push48=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop48
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop47
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $9, $pop46
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $10, $pop45
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $11, $pop44
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $12, $pop43
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $13, $pop42
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $14, $pop41
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $15, $pop40
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $16, $pop39
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38
+; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop29
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $5, $pop28
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $7, $pop26
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $8, $pop25
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push24=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $9, $pop24
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $10, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $11, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $12, $pop21
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $13, $pop20
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $14, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $15, $pop18
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $16, $pop17
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%a = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@@ -5274,91 +4306,69 @@ define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128: .functype andnot_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, -1
-; NO-SIMD128-NEXT: i32.xor $push1=, $25, $pop0
-; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop1
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push69=, -1
-; NO-SIMD128-NEXT: i32.xor $push3=, $21, $pop69
-; NO-SIMD128-NEXT: i32.and $push4=, $5, $pop3
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push68=, -1
-; NO-SIMD128-NEXT: i32.xor $push5=, $19, $pop68
-; NO-SIMD128-NEXT: i32.and $push6=, $3, $pop5
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push67=, -1
-; NO-SIMD128-NEXT: i32.xor $push7=, $18, $pop67
-; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop7
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push66=, -1
-; NO-SIMD128-NEXT: i32.xor $push9=, $17, $pop66
-; NO-SIMD128-NEXT: i32.and $push10=, $1, $pop9
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop10
-; NO-SIMD128-NEXT: i32.const $push13=, 15
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.const $push65=, -1
-; NO-SIMD128-NEXT: i32.xor $push11=, $32, $pop65
-; NO-SIMD128-NEXT: i32.and $push12=, $16, $pop11
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push17=, 14
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.const $push64=, -1
-; NO-SIMD128-NEXT: i32.xor $push15=, $31, $pop64
-; NO-SIMD128-NEXT: i32.and $push16=, $15, $pop15
-; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push21=, 13
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.const $push63=, -1
-; NO-SIMD128-NEXT: i32.xor $push19=, $30, $pop63
-; NO-SIMD128-NEXT: i32.and $push20=, $14, $pop19
-; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push25=, 12
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.const $push62=, -1
-; NO-SIMD128-NEXT: i32.xor $push23=, $29, $pop62
-; NO-SIMD128-NEXT: i32.and $push24=, $13, $pop23
-; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push29=, 11
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.const $push61=, -1
-; NO-SIMD128-NEXT: i32.xor $push27=, $28, $pop61
-; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop27
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push33=, 10
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.const $push60=, -1
-; NO-SIMD128-NEXT: i32.xor $push31=, $27, $pop60
-; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop31
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.const $push37=, 9
-; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37
-; NO-SIMD128-NEXT: i32.const $push59=, -1
-; NO-SIMD128-NEXT: i32.xor $push35=, $26, $pop59
-; NO-SIMD128-NEXT: i32.and $push36=, $10, $pop35
-; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36
-; NO-SIMD128-NEXT: i32.const $push41=, 7
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.const $push58=, -1
-; NO-SIMD128-NEXT: i32.xor $push39=, $24, $pop58
-; NO-SIMD128-NEXT: i32.and $push40=, $8, $pop39
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.const $push45=, 6
-; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45
-; NO-SIMD128-NEXT: i32.const $push57=, -1
-; NO-SIMD128-NEXT: i32.xor $push43=, $23, $pop57
-; NO-SIMD128-NEXT: i32.and $push44=, $7, $pop43
-; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44
-; NO-SIMD128-NEXT: i32.const $push49=, 5
-; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-NEXT: i32.const $push56=, -1
-; NO-SIMD128-NEXT: i32.xor $push47=, $22, $pop56
-; NO-SIMD128-NEXT: i32.and $push48=, $6, $pop47
-; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-NEXT: i32.const $push53=, 3
-; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53
-; NO-SIMD128-NEXT: i32.const $push55=, -1
-; NO-SIMD128-NEXT: i32.xor $push51=, $20, $pop55
-; NO-SIMD128-NEXT: i32.and $push52=, $4, $pop51
-; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52
+; NO-SIMD128-NEXT: i32.xor $push1=, $32, $pop0
+; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop1
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push47=, -1
+; NO-SIMD128-NEXT: i32.xor $push3=, $31, $pop47
+; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop3
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push46=, -1
+; NO-SIMD128-NEXT: i32.xor $push5=, $30, $pop46
+; NO-SIMD128-NEXT: i32.and $push6=, $14, $pop5
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push45=, -1
+; NO-SIMD128-NEXT: i32.xor $push7=, $29, $pop45
+; NO-SIMD128-NEXT: i32.and $push8=, $13, $pop7
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push44=, -1
+; NO-SIMD128-NEXT: i32.xor $push9=, $28, $pop44
+; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop9
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push43=, -1
+; NO-SIMD128-NEXT: i32.xor $push11=, $27, $pop43
+; NO-SIMD128-NEXT: i32.and $push12=, $11, $pop11
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push42=, -1
+; NO-SIMD128-NEXT: i32.xor $push13=, $26, $pop42
+; NO-SIMD128-NEXT: i32.and $push14=, $10, $pop13
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push41=, -1
+; NO-SIMD128-NEXT: i32.xor $push15=, $25, $pop41
+; NO-SIMD128-NEXT: i32.and $push16=, $9, $pop15
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop16
+; NO-SIMD128-NEXT: i32.const $push40=, -1
+; NO-SIMD128-NEXT: i32.xor $push17=, $24, $pop40
+; NO-SIMD128-NEXT: i32.and $push18=, $8, $pop17
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop18
+; NO-SIMD128-NEXT: i32.const $push39=, -1
+; NO-SIMD128-NEXT: i32.xor $push19=, $23, $pop39
+; NO-SIMD128-NEXT: i32.and $push20=, $7, $pop19
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop20
+; NO-SIMD128-NEXT: i32.const $push38=, -1
+; NO-SIMD128-NEXT: i32.xor $push21=, $22, $pop38
+; NO-SIMD128-NEXT: i32.and $push22=, $6, $pop21
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop22
+; NO-SIMD128-NEXT: i32.const $push37=, -1
+; NO-SIMD128-NEXT: i32.xor $push23=, $21, $pop37
+; NO-SIMD128-NEXT: i32.and $push24=, $5, $pop23
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop24
+; NO-SIMD128-NEXT: i32.const $push36=, -1
+; NO-SIMD128-NEXT: i32.xor $push25=, $20, $pop36
+; NO-SIMD128-NEXT: i32.and $push26=, $4, $pop25
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop26
+; NO-SIMD128-NEXT: i32.const $push35=, -1
+; NO-SIMD128-NEXT: i32.xor $push27=, $19, $pop35
+; NO-SIMD128-NEXT: i32.and $push28=, $3, $pop27
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop28
+; NO-SIMD128-NEXT: i32.const $push34=, -1
+; NO-SIMD128-NEXT: i32.xor $push29=, $18, $pop34
+; NO-SIMD128-NEXT: i32.and $push30=, $2, $pop29
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop30
+; NO-SIMD128-NEXT: i32.const $push33=, -1
+; NO-SIMD128-NEXT: i32.xor $push31=, $17, $pop33
+; NO-SIMD128-NEXT: i32.and $push32=, $1, $pop31
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop32
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: andnot_v16i8:
@@ -5368,88 +4378,66 @@ define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $17, $pop0
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop69
+; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop47
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop68
+; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop46
; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $pop67
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $21, $pop66
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $pop65
-; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push64=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $23, $pop64
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push63=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $24, $pop63
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24
-; NO-SIMD128-FAST-NEXT: i32.const $push62=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $25, $pop62
-; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $26, $pop61
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $10, $pop29
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push60=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $27, $pop60
-; NO-SIMD128-FAST-NEXT: i32.and $push34=, $11, $pop33
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push59=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $28, $pop59
-; NO-SIMD128-FAST-NEXT: i32.and $push38=, $12, $pop37
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.const $push58=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $29, $pop58
-; NO-SIMD128-FAST-NEXT: i32.and $push42=, $13, $pop41
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43
-; NO-SIMD128-FAST-NEXT: i32.const $push57=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $30, $pop57
-; NO-SIMD128-FAST-NEXT: i32.and $push46=, $14, $pop45
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47
-; NO-SIMD128-FAST-NEXT: i32.const $push56=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $31, $pop56
-; NO-SIMD128-FAST-NEXT: i32.and $push50=, $15, $pop49
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $32, $pop55
-; NO-SIMD128-FAST-NEXT: i32.and $push54=, $16, $pop53
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54
+; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $20, $pop45
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $21, $pop44
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $5, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $22, $pop43
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $6, $pop11
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $23, $pop42
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $7, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $24, $pop41
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $8, $pop15
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $25, $pop40
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop17
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $26, $pop39
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $10, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $27, $pop38
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $pop21
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $28, $pop37
+; NO-SIMD128-FAST-NEXT: i32.and $push24=, $12, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $29, $pop36
+; NO-SIMD128-FAST-NEXT: i32.and $push26=, $13, $pop25
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $30, $pop35
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $14, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $31, $pop34
+; NO-SIMD128-FAST-NEXT: i32.and $push30=, $15, $pop29
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $32, $pop33
+; NO-SIMD128-FAST-NEXT: i32.and $push32=, $16, $pop31
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32
; NO-SIMD128-FAST-NEXT: return
%inv_y = xor <16 x i8> %y,
<i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
@@ -5477,124 +4465,102 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128-LABEL: bitselect_v16i8:
; NO-SIMD128: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 15
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.and $push0=, $16, $32
; NO-SIMD128-NEXT: i32.const $push1=, -1
; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop1
; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $48
; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3
-; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push11=, 14
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.and $push7=, $15, $31
-; NO-SIMD128-NEXT: i32.const $push101=, -1
-; NO-SIMD128-NEXT: i32.xor $push8=, $15, $pop101
-; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $47
-; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9
-; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push17=, 13
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.and $push13=, $14, $30
-; NO-SIMD128-NEXT: i32.const $push100=, -1
-; NO-SIMD128-NEXT: i32.xor $push14=, $14, $pop100
-; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $46
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop4
+; NO-SIMD128-NEXT: i32.and $push5=, $15, $31
+; NO-SIMD128-NEXT: i32.const $push79=, -1
+; NO-SIMD128-NEXT: i32.xor $push6=, $15, $pop79
+; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $47
+; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop8
+; NO-SIMD128-NEXT: i32.and $push9=, $14, $30
+; NO-SIMD128-NEXT: i32.const $push78=, -1
+; NO-SIMD128-NEXT: i32.xor $push10=, $14, $pop78
+; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $46
+; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop12
+; NO-SIMD128-NEXT: i32.and $push13=, $13, $29
+; NO-SIMD128-NEXT: i32.const $push77=, -1
+; NO-SIMD128-NEXT: i32.xor $push14=, $13, $pop77
+; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $45
; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15
-; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push23=, 12
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.and $push19=, $13, $29
-; NO-SIMD128-NEXT: i32.const $push99=, -1
-; NO-SIMD128-NEXT: i32.xor $push20=, $13, $pop99
-; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $45
-; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21
-; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22
-; NO-SIMD128-NEXT: i32.const $push29=, 11
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.and $push25=, $12, $28
-; NO-SIMD128-NEXT: i32.const $push98=, -1
-; NO-SIMD128-NEXT: i32.xor $push26=, $12, $pop98
-; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $44
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop16
+; NO-SIMD128-NEXT: i32.and $push17=, $12, $28
+; NO-SIMD128-NEXT: i32.const $push76=, -1
+; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop76
+; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $44
+; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop20
+; NO-SIMD128-NEXT: i32.and $push21=, $11, $27
+; NO-SIMD128-NEXT: i32.const $push75=, -1
+; NO-SIMD128-NEXT: i32.xor $push22=, $11, $pop75
+; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $43
+; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop24
+; NO-SIMD128-NEXT: i32.and $push25=, $10, $26
+; NO-SIMD128-NEXT: i32.const $push74=, -1
+; NO-SIMD128-NEXT: i32.xor $push26=, $10, $pop74
+; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $42
; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push35=, 10
-; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-NEXT: i32.and $push31=, $11, $27
-; NO-SIMD128-NEXT: i32.const $push97=, -1
-; NO-SIMD128-NEXT: i32.xor $push32=, $11, $pop97
-; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $43
-; NO-SIMD128-NEXT: i32.or $push34=, $pop31, $pop33
-; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34
-; NO-SIMD128-NEXT: i32.const $push41=, 9
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.and $push37=, $10, $26
-; NO-SIMD128-NEXT: i32.const $push96=, -1
-; NO-SIMD128-NEXT: i32.xor $push38=, $10, $pop96
-; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $42
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop28
+; NO-SIMD128-NEXT: i32.and $push29=, $9, $25
+; NO-SIMD128-NEXT: i32.const $push73=, -1
+; NO-SIMD128-NEXT: i32.xor $push30=, $9, $pop73
+; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $41
+; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop32
+; NO-SIMD128-NEXT: i32.and $push33=, $8, $24
+; NO-SIMD128-NEXT: i32.const $push72=, -1
+; NO-SIMD128-NEXT: i32.xor $push34=, $8, $pop72
+; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $40
+; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop36
+; NO-SIMD128-NEXT: i32.and $push37=, $7, $23
+; NO-SIMD128-NEXT: i32.const $push71=, -1
+; NO-SIMD128-NEXT: i32.xor $push38=, $7, $pop71
+; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $39
; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.and $push43=, $9, $25
-; NO-SIMD128-NEXT: i32.const $push95=, -1
-; NO-SIMD128-NEXT: i32.xor $push44=, $9, $pop95
-; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $41
-; NO-SIMD128-NEXT: i32.or $push46=, $pop43, $pop45
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop46
-; NO-SIMD128-NEXT: i32.const $push51=, 7
-; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-NEXT: i32.and $push47=, $8, $24
-; NO-SIMD128-NEXT: i32.const $push94=, -1
-; NO-SIMD128-NEXT: i32.xor $push48=, $8, $pop94
-; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $40
-; NO-SIMD128-NEXT: i32.or $push50=, $pop47, $pop49
-; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50
-; NO-SIMD128-NEXT: i32.const $push57=, 6
-; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57
-; NO-SIMD128-NEXT: i32.and $push53=, $7, $23
-; NO-SIMD128-NEXT: i32.const $push93=, -1
-; NO-SIMD128-NEXT: i32.xor $push54=, $7, $pop93
-; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $39
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop40
+; NO-SIMD128-NEXT: i32.and $push41=, $6, $22
+; NO-SIMD128-NEXT: i32.const $push70=, -1
+; NO-SIMD128-NEXT: i32.xor $push42=, $6, $pop70
+; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $38
+; NO-SIMD128-NEXT: i32.or $push44=, $pop41, $pop43
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop44
+; NO-SIMD128-NEXT: i32.and $push45=, $5, $21
+; NO-SIMD128-NEXT: i32.const $push69=, -1
+; NO-SIMD128-NEXT: i32.xor $push46=, $5, $pop69
+; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $37
+; NO-SIMD128-NEXT: i32.or $push48=, $pop45, $pop47
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop48
+; NO-SIMD128-NEXT: i32.and $push49=, $4, $20
+; NO-SIMD128-NEXT: i32.const $push68=, -1
+; NO-SIMD128-NEXT: i32.xor $push50=, $4, $pop68
+; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $36
+; NO-SIMD128-NEXT: i32.or $push52=, $pop49, $pop51
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop52
+; NO-SIMD128-NEXT: i32.and $push53=, $3, $19
+; NO-SIMD128-NEXT: i32.const $push67=, -1
+; NO-SIMD128-NEXT: i32.xor $push54=, $3, $pop67
+; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $35
; NO-SIMD128-NEXT: i32.or $push56=, $pop53, $pop55
-; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56
-; NO-SIMD128-NEXT: i32.const $push63=, 5
-; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63
-; NO-SIMD128-NEXT: i32.and $push59=, $6, $22
-; NO-SIMD128-NEXT: i32.const $push92=, -1
-; NO-SIMD128-NEXT: i32.xor $push60=, $6, $pop92
-; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $38
-; NO-SIMD128-NEXT: i32.or $push62=, $pop59, $pop61
-; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62
-; NO-SIMD128-NEXT: i32.and $push65=, $5, $21
-; NO-SIMD128-NEXT: i32.const $push91=, -1
-; NO-SIMD128-NEXT: i32.xor $push66=, $5, $pop91
-; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $37
-; NO-SIMD128-NEXT: i32.or $push68=, $pop65, $pop67
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop68
-; NO-SIMD128-NEXT: i32.const $push73=, 3
-; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73
-; NO-SIMD128-NEXT: i32.and $push69=, $4, $20
-; NO-SIMD128-NEXT: i32.const $push90=, -1
-; NO-SIMD128-NEXT: i32.xor $push70=, $4, $pop90
-; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $36
-; NO-SIMD128-NEXT: i32.or $push72=, $pop69, $pop71
-; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72
-; NO-SIMD128-NEXT: i32.and $push75=, $3, $19
-; NO-SIMD128-NEXT: i32.const $push89=, -1
-; NO-SIMD128-NEXT: i32.xor $push76=, $3, $pop89
-; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $35
-; NO-SIMD128-NEXT: i32.or $push78=, $pop75, $pop77
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop78
-; NO-SIMD128-NEXT: i32.and $push79=, $2, $18
-; NO-SIMD128-NEXT: i32.const $push88=, -1
-; NO-SIMD128-NEXT: i32.xor $push80=, $2, $pop88
-; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $34
-; NO-SIMD128-NEXT: i32.or $push82=, $pop79, $pop81
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop82
-; NO-SIMD128-NEXT: i32.and $push83=, $1, $17
-; NO-SIMD128-NEXT: i32.const $push87=, -1
-; NO-SIMD128-NEXT: i32.xor $push84=, $1, $pop87
-; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $33
-; NO-SIMD128-NEXT: i32.or $push86=, $pop83, $pop85
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop86
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop56
+; NO-SIMD128-NEXT: i32.and $push57=, $2, $18
+; NO-SIMD128-NEXT: i32.const $push66=, -1
+; NO-SIMD128-NEXT: i32.xor $push58=, $2, $pop66
+; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $34
+; NO-SIMD128-NEXT: i32.or $push60=, $pop57, $pop59
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop60
+; NO-SIMD128-NEXT: i32.and $push61=, $1, $17
+; NO-SIMD128-NEXT: i32.const $push65=, -1
+; NO-SIMD128-NEXT: i32.xor $push62=, $1, $pop65
+; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $33
+; NO-SIMD128-NEXT: i32.or $push64=, $pop61, $pop63
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop64
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_v16i8:
@@ -5607,117 +4573,95 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4
; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $18
-; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop101
+; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop79
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $34
; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8
; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $19
-; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop100
+; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop78
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $35
; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $20
-; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop99
+; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop77
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $36
; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $5, $21
-; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop98
-; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $37
-; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $6, $22
-; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop97
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $38
-; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $7, $23
-; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop96
-; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $39
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $5, $21
+; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop76
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $37
+; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.and $push21=, $6, $22
+; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop75
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $38
+; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $7, $23
+; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop74
+; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $39
+; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $24
+; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop73
+; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $40
; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.and $push35=, $8, $24
-; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop95
-; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $40
-; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38
-; NO-SIMD128-FAST-NEXT: i32.and $push41=, $9, $25
-; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $9, $pop94
-; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $41
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32
+; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $25
+; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $9, $pop72
+; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $41
+; NO-SIMD128-FAST-NEXT: i32.or $push36=, $pop33, $pop35
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $26
+; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $10, $pop71
+; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $42
+; NO-SIMD128-FAST-NEXT: i32.or $push40=, $pop37, $pop39
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40
+; NO-SIMD128-FAST-NEXT: i32.and $push41=, $11, $27
+; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $11, $pop70
+; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $43
; NO-SIMD128-FAST-NEXT: i32.or $push44=, $pop41, $pop43
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-FAST-NEXT: i32.and $push45=, $10, $26
-; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $10, $pop93
-; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $42
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44
+; NO-SIMD128-FAST-NEXT: i32.and $push45=, $12, $28
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop69
+; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $44
; NO-SIMD128-FAST-NEXT: i32.or $push48=, $pop45, $pop47
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55
-; NO-SIMD128-FAST-NEXT: i32.and $push51=, $11, $27
-; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $11, $pop92
-; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $43
-; NO-SIMD128-FAST-NEXT: i32.or $push54=, $pop51, $pop53
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61
-; NO-SIMD128-FAST-NEXT: i32.and $push57=, $12, $28
-; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $12, $pop91
-; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $44
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48
+; NO-SIMD128-FAST-NEXT: i32.and $push49=, $13, $29
+; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $13, $pop68
+; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $45
+; NO-SIMD128-FAST-NEXT: i32.or $push52=, $pop49, $pop51
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52
+; NO-SIMD128-FAST-NEXT: i32.and $push53=, $14, $30
+; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $14, $pop67
+; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $46
+; NO-SIMD128-FAST-NEXT: i32.or $push56=, $pop53, $pop55
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56
+; NO-SIMD128-FAST-NEXT: i32.and $push57=, $15, $31
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $15, $pop66
+; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $47
; NO-SIMD128-FAST-NEXT: i32.or $push60=, $pop57, $pop59
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60
-; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67
-; NO-SIMD128-FAST-NEXT: i32.and $push63=, $13, $29
-; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $13, $pop90
-; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $45
-; NO-SIMD128-FAST-NEXT: i32.or $push66=, $pop63, $pop65
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66
-; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73
-; NO-SIMD128-FAST-NEXT: i32.and $push69=, $14, $30
-; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $14, $pop89
-; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $46
-; NO-SIMD128-FAST-NEXT: i32.or $push72=, $pop69, $pop71
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72
-; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79
-; NO-SIMD128-FAST-NEXT: i32.and $push75=, $15, $31
-; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $15, $pop88
-; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $47
-; NO-SIMD128-FAST-NEXT: i32.or $push78=, $pop75, $pop77
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78
-; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85
-; NO-SIMD128-FAST-NEXT: i32.and $push81=, $16, $32
-; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $16, $pop87
-; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $48
-; NO-SIMD128-FAST-NEXT: i32.or $push84=, $pop81, $pop83
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60
+; NO-SIMD128-FAST-NEXT: i32.and $push61=, $16, $32
+; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $16, $pop65
+; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $48
+; NO-SIMD128-FAST-NEXT: i32.or $push64=, $pop61, $pop63
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64
; NO-SIMD128-FAST-NEXT: return
%masked_v1 = and <16 x i8> %c, %v1
%inv_mask = xor <16 x i8> %c,
@@ -5746,92 +4690,70 @@ define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2
; NO-SIMD128-LABEL: bitselect_xor_v16i8:
; NO-SIMD128: .functype bitselect_xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push3=, 15
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
; NO-SIMD128-NEXT: i32.xor $push0=, $32, $48
; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $16
; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $48
-; NO-SIMD128-NEXT: i32.store8 0($pop4), $pop2
-; NO-SIMD128-NEXT: i32.const $push8=, 14
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.xor $push5=, $31, $47
-; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $15
-; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $47
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop7
-; NO-SIMD128-NEXT: i32.const $push13=, 13
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46
-; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $14
-; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46
-; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.const $push18=, 12
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.xor $push15=, $29, $45
-; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $13
-; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $45
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17
-; NO-SIMD128-NEXT: i32.const $push23=, 11
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.xor $push20=, $28, $44
-; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $12
-; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $44
-; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22
-; NO-SIMD128-NEXT: i32.const $push28=, 10
-; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-NEXT: i32.xor $push25=, $27, $43
-; NO-SIMD128-NEXT: i32.and $push26=, $pop25, $11
-; NO-SIMD128-NEXT: i32.xor $push27=, $pop26, $43
-; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27
-; NO-SIMD128-NEXT: i32.const $push33=, 9
-; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-NEXT: i32.xor $push30=, $26, $42
-; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $10
-; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $42
-; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-NEXT: i32.xor $push35=, $25, $41
-; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $9
-; NO-SIMD128-NEXT: i32.xor $push37=, $pop36, $41
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop37
-; NO-SIMD128-NEXT: i32.const $push41=, 7
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.xor $push38=, $24, $40
-; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $8
-; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $40
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.const $push46=, 6
-; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46
-; NO-SIMD128-NEXT: i32.xor $push43=, $23, $39
-; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $7
-; NO-SIMD128-NEXT: i32.xor $push45=, $pop44, $39
-; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45
-; NO-SIMD128-NEXT: i32.const $push51=, 5
-; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-NEXT: i32.xor $push48=, $22, $38
-; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $6
-; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $38
-; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50
-; NO-SIMD128-NEXT: i32.xor $push53=, $21, $37
-; NO-SIMD128-NEXT: i32.and $push54=, $pop53, $5
-; NO-SIMD128-NEXT: i32.xor $push55=, $pop54, $37
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop55
-; NO-SIMD128-NEXT: i32.const $push59=, 3
-; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59
-; NO-SIMD128-NEXT: i32.xor $push56=, $20, $36
-; NO-SIMD128-NEXT: i32.and $push57=, $pop56, $4
-; NO-SIMD128-NEXT: i32.xor $push58=, $pop57, $36
-; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58
-; NO-SIMD128-NEXT: i32.xor $push61=, $19, $35
-; NO-SIMD128-NEXT: i32.and $push62=, $pop61, $3
-; NO-SIMD128-NEXT: i32.xor $push63=, $pop62, $35
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop63
-; NO-SIMD128-NEXT: i32.xor $push64=, $18, $34
-; NO-SIMD128-NEXT: i32.and $push65=, $pop64, $2
-; NO-SIMD128-NEXT: i32.xor $push66=, $pop65, $34
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop66
-; NO-SIMD128-NEXT: i32.xor $push67=, $17, $33
-; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $1
-; NO-SIMD128-NEXT: i32.xor $push69=, $pop68, $33
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop69
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop2
+; NO-SIMD128-NEXT: i32.xor $push3=, $31, $47
+; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $15
+; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $47
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop5
+; NO-SIMD128-NEXT: i32.xor $push6=, $30, $46
+; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $14
+; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $46
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop8
+; NO-SIMD128-NEXT: i32.xor $push9=, $29, $45
+; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $13
+; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $45
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop11
+; NO-SIMD128-NEXT: i32.xor $push12=, $28, $44
+; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $12
+; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $44
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop14
+; NO-SIMD128-NEXT: i32.xor $push15=, $27, $43
+; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $11
+; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $43
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop17
+; NO-SIMD128-NEXT: i32.xor $push18=, $26, $42
+; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $10
+; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $42
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop20
+; NO-SIMD128-NEXT: i32.xor $push21=, $25, $41
+; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $9
+; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $41
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop23
+; NO-SIMD128-NEXT: i32.xor $push24=, $24, $40
+; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $8
+; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $40
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop26
+; NO-SIMD128-NEXT: i32.xor $push27=, $23, $39
+; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $7
+; NO-SIMD128-NEXT: i32.xor $push29=, $pop28, $39
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop29
+; NO-SIMD128-NEXT: i32.xor $push30=, $22, $38
+; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $6
+; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $38
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop32
+; NO-SIMD128-NEXT: i32.xor $push33=, $21, $37
+; NO-SIMD128-NEXT: i32.and $push34=, $pop33, $5
+; NO-SIMD128-NEXT: i32.xor $push35=, $pop34, $37
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop35
+; NO-SIMD128-NEXT: i32.xor $push36=, $20, $36
+; NO-SIMD128-NEXT: i32.and $push37=, $pop36, $4
+; NO-SIMD128-NEXT: i32.xor $push38=, $pop37, $36
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop38
+; NO-SIMD128-NEXT: i32.xor $push39=, $19, $35
+; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $3
+; NO-SIMD128-NEXT: i32.xor $push41=, $pop40, $35
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop41
+; NO-SIMD128-NEXT: i32.xor $push42=, $18, $34
+; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $2
+; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $34
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop44
+; NO-SIMD128-NEXT: i32.xor $push45=, $17, $33
+; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $1
+; NO-SIMD128-NEXT: i32.xor $push47=, $pop46, $33
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop47
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_xor_v16i8:
@@ -5849,80 +4771,58 @@ define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $35
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $20, $36
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4
-; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $36
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop13
-; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $21, $37
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5
-; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $37
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $22, $38
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6
-; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $38
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push22=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $23, $39
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7
-; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $39
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop23), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $24, $40
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8
-; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $40
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop31
-; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $25, $41
-; NO-SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $9
-; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $pop33, $41
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $26, $42
-; NO-SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $10
-; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $pop38, $42
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop39
-; NO-SIMD128-FAST-NEXT: i32.const $push40=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push41=, $0, $pop40
-; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43
-; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $11
-; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop41), $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45
-; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $28, $44
-; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $12
-; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $pop48, $44
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop49
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push51=, $0, $pop50
-; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $29, $45
-; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $13
-; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $45
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop51), $pop54
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55
-; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $30, $46
-; NO-SIMD128-FAST-NEXT: i32.and $push58=, $pop57, $14
-; NO-SIMD128-FAST-NEXT: i32.xor $push59=, $pop58, $46
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop59
-; NO-SIMD128-FAST-NEXT: i32.const $push60=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60
-; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $31, $47
-; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $15
-; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $47
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop64
-; NO-SIMD128-FAST-NEXT: i32.const $push65=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push66=, $0, $pop65
-; NO-SIMD128-FAST-NEXT: i32.xor $push67=, $32, $48
-; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $16
-; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $pop68, $48
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop66), $pop69
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $36
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $36
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $21, $37
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $37
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $38
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6
+; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $38
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop17
+; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $23, $39
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7
+; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $39
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $24, $40
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8
+; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $40
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop23
+; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $25, $41
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $9
+; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $41
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26
+; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $26, $42
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $10
+; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $pop28, $42
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop29
+; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $27, $43
+; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $11
+; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $43
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop32
+; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $28, $44
+; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $12
+; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $pop34, $44
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop35
+; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $29, $45
+; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $13
+; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $45
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop38
+; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $30, $46
+; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $14
+; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $pop40, $46
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop41
+; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $31, $47
+; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $15
+; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $47
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop44
+; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $32, $48
+; NO-SIMD128-FAST-NEXT: i32.and $push46=, $pop45, $16
+; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $pop46, $48
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop47
; NO-SIMD128-FAST-NEXT: return
%xor1 = xor <16 x i8> %v1, %v2
%and = and <16 x i8> %xor1, %c
@@ -5949,124 +4849,102 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16
; NO-SIMD128-LABEL: bitselect_xor_reversed_v16i8:
; NO-SIMD128: .functype bitselect_xor_reversed_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 15
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.xor $push2=, $32, $48
; NO-SIMD128-NEXT: i32.const $push0=, -1
; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0
; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1
; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $48
-; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push11=, 14
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.xor $push8=, $31, $47
-; NO-SIMD128-NEXT: i32.const $push101=, -1
-; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop101
-; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $47
-; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push17=, 13
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.xor $push14=, $30, $46
-; NO-SIMD128-NEXT: i32.const $push100=, -1
-; NO-SIMD128-NEXT: i32.xor $push13=, $14, $pop100
+; NO-SIMD128-NEXT: i32.store8 15($0), $pop4
+; NO-SIMD128-NEXT: i32.xor $push6=, $31, $47
+; NO-SIMD128-NEXT: i32.const $push79=, -1
+; NO-SIMD128-NEXT: i32.xor $push5=, $15, $pop79
+; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5
+; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $47
+; NO-SIMD128-NEXT: i32.store8 14($0), $pop8
+; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46
+; NO-SIMD128-NEXT: i32.const $push78=, -1
+; NO-SIMD128-NEXT: i32.xor $push9=, $14, $pop78
+; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9
+; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46
+; NO-SIMD128-NEXT: i32.store8 13($0), $pop12
+; NO-SIMD128-NEXT: i32.xor $push14=, $29, $45
+; NO-SIMD128-NEXT: i32.const $push77=, -1
+; NO-SIMD128-NEXT: i32.xor $push13=, $13, $pop77
; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $46
-; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push23=, 12
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.xor $push20=, $29, $45
-; NO-SIMD128-NEXT: i32.const $push99=, -1
-; NO-SIMD128-NEXT: i32.xor $push19=, $13, $pop99
-; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19
-; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $45
-; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22
-; NO-SIMD128-NEXT: i32.const $push29=, 11
-; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29
-; NO-SIMD128-NEXT: i32.xor $push26=, $28, $44
-; NO-SIMD128-NEXT: i32.const $push98=, -1
-; NO-SIMD128-NEXT: i32.xor $push25=, $12, $pop98
+; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $45
+; NO-SIMD128-NEXT: i32.store8 12($0), $pop16
+; NO-SIMD128-NEXT: i32.xor $push18=, $28, $44
+; NO-SIMD128-NEXT: i32.const $push76=, -1
+; NO-SIMD128-NEXT: i32.xor $push17=, $12, $pop76
+; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $pop17
+; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $44
+; NO-SIMD128-NEXT: i32.store8 11($0), $pop20
+; NO-SIMD128-NEXT: i32.xor $push22=, $27, $43
+; NO-SIMD128-NEXT: i32.const $push75=, -1
+; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop75
+; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $pop21
+; NO-SIMD128-NEXT: i32.xor $push24=, $pop23, $43
+; NO-SIMD128-NEXT: i32.store8 10($0), $pop24
+; NO-SIMD128-NEXT: i32.xor $push26=, $26, $42
+; NO-SIMD128-NEXT: i32.const $push74=, -1
+; NO-SIMD128-NEXT: i32.xor $push25=, $10, $pop74
; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25
-; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $44
-; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28
-; NO-SIMD128-NEXT: i32.const $push35=, 10
-; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35
-; NO-SIMD128-NEXT: i32.xor $push32=, $27, $43
-; NO-SIMD128-NEXT: i32.const $push97=, -1
-; NO-SIMD128-NEXT: i32.xor $push31=, $11, $pop97
-; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $pop31
-; NO-SIMD128-NEXT: i32.xor $push34=, $pop33, $43
-; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34
-; NO-SIMD128-NEXT: i32.const $push41=, 9
-; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41
-; NO-SIMD128-NEXT: i32.xor $push38=, $26, $42
-; NO-SIMD128-NEXT: i32.const $push96=, -1
-; NO-SIMD128-NEXT: i32.xor $push37=, $10, $pop96
+; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $42
+; NO-SIMD128-NEXT: i32.store8 9($0), $pop28
+; NO-SIMD128-NEXT: i32.xor $push30=, $25, $41
+; NO-SIMD128-NEXT: i32.const $push73=, -1
+; NO-SIMD128-NEXT: i32.xor $push29=, $9, $pop73
+; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29
+; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $41
+; NO-SIMD128-NEXT: i32.store8 8($0), $pop32
+; NO-SIMD128-NEXT: i32.xor $push34=, $24, $40
+; NO-SIMD128-NEXT: i32.const $push72=, -1
+; NO-SIMD128-NEXT: i32.xor $push33=, $8, $pop72
+; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33
+; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $40
+; NO-SIMD128-NEXT: i32.store8 7($0), $pop36
+; NO-SIMD128-NEXT: i32.xor $push38=, $23, $39
+; NO-SIMD128-NEXT: i32.const $push71=, -1
+; NO-SIMD128-NEXT: i32.xor $push37=, $7, $pop71
; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37
-; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $42
-; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40
-; NO-SIMD128-NEXT: i32.xor $push44=, $25, $41
-; NO-SIMD128-NEXT: i32.const $push95=, -1
-; NO-SIMD128-NEXT: i32.xor $push43=, $9, $pop95
-; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $pop43
-; NO-SIMD128-NEXT: i32.xor $push46=, $pop45, $41
-; NO-SIMD128-NEXT: i32.store8 8($0), $pop46
-; NO-SIMD128-NEXT: i32.const $push51=, 7
-; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51
-; NO-SIMD128-NEXT: i32.xor $push48=, $24, $40
-; NO-SIMD128-NEXT: i32.const $push94=, -1
-; NO-SIMD128-NEXT: i32.xor $push47=, $8, $pop94
-; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $pop47
-; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $40
-; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50
-; NO-SIMD128-NEXT: i32.const $push57=, 6
-; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57
-; NO-SIMD128-NEXT: i32.xor $push54=, $23, $39
-; NO-SIMD128-NEXT: i32.const $push93=, -1
-; NO-SIMD128-NEXT: i32.xor $push53=, $7, $pop93
+; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $39
+; NO-SIMD128-NEXT: i32.store8 6($0), $pop40
+; NO-SIMD128-NEXT: i32.xor $push42=, $22, $38
+; NO-SIMD128-NEXT: i32.const $push70=, -1
+; NO-SIMD128-NEXT: i32.xor $push41=, $6, $pop70
+; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $pop41
+; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $38
+; NO-SIMD128-NEXT: i32.store8 5($0), $pop44
+; NO-SIMD128-NEXT: i32.xor $push46=, $21, $37
+; NO-SIMD128-NEXT: i32.const $push69=, -1
+; NO-SIMD128-NEXT: i32.xor $push45=, $5, $pop69
+; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $pop45
+; NO-SIMD128-NEXT: i32.xor $push48=, $pop47, $37
+; NO-SIMD128-NEXT: i32.store8 4($0), $pop48
+; NO-SIMD128-NEXT: i32.xor $push50=, $20, $36
+; NO-SIMD128-NEXT: i32.const $push68=, -1
+; NO-SIMD128-NEXT: i32.xor $push49=, $4, $pop68
+; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $pop49
+; NO-SIMD128-NEXT: i32.xor $push52=, $pop51, $36
+; NO-SIMD128-NEXT: i32.store8 3($0), $pop52
+; NO-SIMD128-NEXT: i32.xor $push54=, $19, $35
+; NO-SIMD128-NEXT: i32.const $push67=, -1
+; NO-SIMD128-NEXT: i32.xor $push53=, $3, $pop67
; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $pop53
-; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $39
-; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56
-; NO-SIMD128-NEXT: i32.const $push63=, 5
-; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63
-; NO-SIMD128-NEXT: i32.xor $push60=, $22, $38
-; NO-SIMD128-NEXT: i32.const $push92=, -1
-; NO-SIMD128-NEXT: i32.xor $push59=, $6, $pop92
-; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $pop59
-; NO-SIMD128-NEXT: i32.xor $push62=, $pop61, $38
-; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62
-; NO-SIMD128-NEXT: i32.xor $push66=, $21, $37
-; NO-SIMD128-NEXT: i32.const $push91=, -1
-; NO-SIMD128-NEXT: i32.xor $push65=, $5, $pop91
-; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $pop65
-; NO-SIMD128-NEXT: i32.xor $push68=, $pop67, $37
-; NO-SIMD128-NEXT: i32.store8 4($0), $pop68
-; NO-SIMD128-NEXT: i32.const $push73=, 3
-; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73
-; NO-SIMD128-NEXT: i32.xor $push70=, $20, $36
-; NO-SIMD128-NEXT: i32.const $push90=, -1
-; NO-SIMD128-NEXT: i32.xor $push69=, $4, $pop90
-; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $pop69
-; NO-SIMD128-NEXT: i32.xor $push72=, $pop71, $36
-; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72
-; NO-SIMD128-NEXT: i32.xor $push76=, $19, $35
-; NO-SIMD128-NEXT: i32.const $push89=, -1
-; NO-SIMD128-NEXT: i32.xor $push75=, $3, $pop89
-; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $pop75
-; NO-SIMD128-NEXT: i32.xor $push78=, $pop77, $35
-; NO-SIMD128-NEXT: i32.store8 2($0), $pop78
-; NO-SIMD128-NEXT: i32.xor $push80=, $18, $34
-; NO-SIMD128-NEXT: i32.const $push88=, -1
-; NO-SIMD128-NEXT: i32.xor $push79=, $2, $pop88
-; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $pop79
-; NO-SIMD128-NEXT: i32.xor $push82=, $pop81, $34
-; NO-SIMD128-NEXT: i32.store8 1($0), $pop82
-; NO-SIMD128-NEXT: i32.xor $push84=, $17, $33
-; NO-SIMD128-NEXT: i32.const $push87=, -1
-; NO-SIMD128-NEXT: i32.xor $push83=, $1, $pop87
-; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $pop83
-; NO-SIMD128-NEXT: i32.xor $push86=, $pop85, $33
-; NO-SIMD128-NEXT: i32.store8 0($0), $pop86
+; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $35
+; NO-SIMD128-NEXT: i32.store8 2($0), $pop56
+; NO-SIMD128-NEXT: i32.xor $push58=, $18, $34
+; NO-SIMD128-NEXT: i32.const $push66=, -1
+; NO-SIMD128-NEXT: i32.xor $push57=, $2, $pop66
+; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $pop57
+; NO-SIMD128-NEXT: i32.xor $push60=, $pop59, $34
+; NO-SIMD128-NEXT: i32.store8 1($0), $pop60
+; NO-SIMD128-NEXT: i32.xor $push62=, $17, $33
+; NO-SIMD128-NEXT: i32.const $push65=, -1
+; NO-SIMD128-NEXT: i32.xor $push61=, $1, $pop65
+; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $pop61
+; NO-SIMD128-NEXT: i32.xor $push64=, $pop63, $33
+; NO-SIMD128-NEXT: i32.store8 0($0), $pop64
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v16i8:
@@ -6079,117 +4957,95 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16
; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $33
; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $18, $34
-; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop101
+; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop79
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $34
; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8
; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $19, $35
-; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop100
+; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop78
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9
; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $35
; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $20, $36
-; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop99
+; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop77
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13
; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $36
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16
-; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $21, $37
-; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop98
-; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19
-; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $37
-; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $22, $38
-; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop97
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $38
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $23, $39
-; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop96
+; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $21, $37
+; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $5, $pop76
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $pop17
+; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $37
+; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $22, $38
+; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $6, $pop75
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop21
+; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $pop23, $38
+; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $23, $39
+; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $7, $pop74
+; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $pop25
+; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $pop27, $39
+; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $24, $40
+; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop73
; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29
-; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $39
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $24, $40
-; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop95
-; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35
-; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $40
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38
-; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $25, $41
-; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $9, $pop94
+; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $40
+; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32
+; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $25, $41
+; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop72
+; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $pop33
+; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $pop35, $41
+; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36
+; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $26, $42
+; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $10, $pop71
+; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $pop37
+; NO-SIMD128-FAST-NEXT: i32.xor $push40=, $pop39, $42
+; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40
+; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43
+; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop70
; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $pop41
-; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $41
-; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9
-; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49
-; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $26, $42
-; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $10, $pop93
+; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43
+; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44
+; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $28, $44
+; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $12, $pop69
; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $pop45
-; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $42
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55
-; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $27, $43
-; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $11, $pop92
-; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $pop51
-; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $43
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11
-; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61
-; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $28, $44
-; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $12, $pop91
+; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $44
+; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48
+; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $29, $45
+; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $13, $pop68
+; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $pop49
+; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $pop51, $45
+; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52
+; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $30, $46
+; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $14, $pop67
+; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $pop53
+; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $pop55, $46
+; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56
+; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $31, $47
+; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $15, $pop66
; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $pop57
-; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $44
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60
-; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67
-; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $29, $45
-; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push63=, $13, $pop90
-; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $pop63
-; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $pop65, $45
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66
-; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13
-; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73
-; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $30, $46
-; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $14, $pop89
-; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $pop69
-; NO-SIMD128-FAST-NEXT: i32.xor $push72=, $pop71, $46
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72
-; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79
-; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $31, $47
-; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push75=, $15, $pop88
-; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $pop75
-; NO-SIMD128-FAST-NEXT: i32.xor $push78=, $pop77, $47
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78
-; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15
-; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85
-; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $32, $48
-; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push81=, $16, $pop87
-; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $pop81
-; NO-SIMD128-FAST-NEXT: i32.xor $push84=, $pop83, $48
-; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84
+; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $47
+; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60
+; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $32, $48
+; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $16, $pop65
+; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $pop61
+; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $48
+; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64
; NO-SIMD128-FAST-NEXT: return
%xor1 = xor <16 x i8> %v1, %v2
%notc = xor <16 x i8> %c, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
@@ -6218,30 +5074,22 @@ define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: add_v8i16:
; NO-SIMD128: .functype add_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.add $push0=, $5, $13
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop0
-; NO-SIMD128-NEXT: i32.add $push1=, $3, $11
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop1
-; NO-SIMD128-NEXT: i32.add $push2=, $2, $10
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-NEXT: i32.add $push3=, $1, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: i32.add $push4=, $8, $16
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.add $push7=, $7, $15
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7
-; NO-SIMD128-NEXT: i32.const $push11=, 10
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.add $push10=, $6, $14
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push14=, 6
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.add $push13=, $4, $12
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13
+; NO-SIMD128-NEXT: i32.add $push0=, $8, $16
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop0
+; NO-SIMD128-NEXT: i32.add $push1=, $7, $15
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop1
+; NO-SIMD128-NEXT: i32.add $push2=, $6, $14
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop2
+; NO-SIMD128-NEXT: i32.add $push3=, $5, $13
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
+; NO-SIMD128-NEXT: i32.add $push4=, $4, $12
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-NEXT: i32.add $push5=, $3, $11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop5
+; NO-SIMD128-NEXT: i32.add $push6=, $2, $10
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
+; NO-SIMD128-NEXT: i32.add $push7=, $1, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: add_v8i16:
@@ -6253,24 +5101,16 @@ define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $12
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $13
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $14
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $16
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15
+; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $12
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.add $push4=, $5, $13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.add $push5=, $6, $14
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.add $push6=, $7, $15
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.add $push7=, $8, $16
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = add <8 x i16> %x, %y
ret <8 x i16> %a
@@ -6292,30 +5132,22 @@ define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: sub_v8i16:
; NO-SIMD128: .functype sub_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.sub $push0=, $5, $13
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop0
-; NO-SIMD128-NEXT: i32.sub $push1=, $3, $11
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop1
-; NO-SIMD128-NEXT: i32.sub $push2=, $2, $10
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-NEXT: i32.sub $push3=, $1, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: i32.sub $push4=, $8, $16
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.sub $push7=, $7, $15
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7
-; NO-SIMD128-NEXT: i32.const $push11=, 10
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.sub $push10=, $6, $14
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push14=, 6
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.sub $push13=, $4, $12
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13
+; NO-SIMD128-NEXT: i32.sub $push0=, $8, $16
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop0
+; NO-SIMD128-NEXT: i32.sub $push1=, $7, $15
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop1
+; NO-SIMD128-NEXT: i32.sub $push2=, $6, $14
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop2
+; NO-SIMD128-NEXT: i32.sub $push3=, $5, $13
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
+; NO-SIMD128-NEXT: i32.sub $push4=, $4, $12
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-NEXT: i32.sub $push5=, $3, $11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop5
+; NO-SIMD128-NEXT: i32.sub $push6=, $2, $10
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
+; NO-SIMD128-NEXT: i32.sub $push7=, $1, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: sub_v8i16:
@@ -6327,24 +5159,16 @@ define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $12
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $13
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $14
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $16
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15
+; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $12
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $5, $13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $6, $14
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $7, $15
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $8, $16
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = sub <8 x i16> %x, %y
ret <8 x i16> %a
@@ -6366,30 +5190,22 @@ define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: mul_v8i16:
; NO-SIMD128: .functype mul_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.mul $push0=, $5, $13
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop0
-; NO-SIMD128-NEXT: i32.mul $push1=, $3, $11
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop1
-; NO-SIMD128-NEXT: i32.mul $push2=, $2, $10
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-NEXT: i32.mul $push3=, $1, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: i32.mul $push4=, $8, $16
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.mul $push7=, $7, $15
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7
-; NO-SIMD128-NEXT: i32.const $push11=, 10
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.mul $push10=, $6, $14
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push14=, 6
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.mul $push13=, $4, $12
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13
+; NO-SIMD128-NEXT: i32.mul $push0=, $8, $16
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop0
+; NO-SIMD128-NEXT: i32.mul $push1=, $7, $15
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop1
+; NO-SIMD128-NEXT: i32.mul $push2=, $6, $14
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop2
+; NO-SIMD128-NEXT: i32.mul $push3=, $5, $13
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
+; NO-SIMD128-NEXT: i32.mul $push4=, $4, $12
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-NEXT: i32.mul $push5=, $3, $11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop5
+; NO-SIMD128-NEXT: i32.mul $push6=, $2, $10
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
+; NO-SIMD128-NEXT: i32.mul $push7=, $1, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: mul_v8i16:
@@ -6401,24 +5217,16 @@ define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $12
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $13
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $14
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $16
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15
+; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $12
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.mul $push4=, $5, $13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $6, $14
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $7, $15
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.mul $push7=, $8, $16
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = mul <8 x i16> %x, %y
ret <8 x i16> %a
@@ -6440,54 +5248,46 @@ define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: min_s_v8i16:
; NO-SIMD128: .functype min_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push4=, 14
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8
; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16
; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0
; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3
-; NO-SIMD128-NEXT: i32.const $push10=, 12
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7
-; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15
-; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8
-; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9
-; NO-SIMD128-NEXT: i32.const $push16=, 10
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6
-; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop3
+; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7
+; NO-SIMD128-NEXT: i32.extend16_s $push4=, $15
+; NO-SIMD128-NEXT: i32.lt_s $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.select $push7=, $7, $15, $pop6
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop7
+; NO-SIMD128-NEXT: i32.extend16_s $push9=, $6
+; NO-SIMD128-NEXT: i32.extend16_s $push8=, $14
+; NO-SIMD128-NEXT: i32.lt_s $push10=, $pop9, $pop8
+; NO-SIMD128-NEXT: i32.select $push11=, $6, $14, $pop10
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop11
+; NO-SIMD128-NEXT: i32.extend16_s $push13=, $5
+; NO-SIMD128-NEXT: i32.extend16_s $push12=, $13
; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12
-; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14
-; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5
-; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13
-; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18
-; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop21
-; NO-SIMD128-NEXT: i32.const $push26=, 6
-; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4
-; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12
-; NO-SIMD128-NEXT: i32.lt_s $push24=, $pop23, $pop22
-; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24
-; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3
-; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11
+; NO-SIMD128-NEXT: i32.select $push15=, $5, $13, $pop14
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop15
+; NO-SIMD128-NEXT: i32.extend16_s $push17=, $4
+; NO-SIMD128-NEXT: i32.extend16_s $push16=, $12
+; NO-SIMD128-NEXT: i32.lt_s $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.select $push19=, $4, $12, $pop18
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop19
+; NO-SIMD128-NEXT: i32.extend16_s $push21=, $3
+; NO-SIMD128-NEXT: i32.extend16_s $push20=, $11
+; NO-SIMD128-NEXT: i32.lt_s $push22=, $pop21, $pop20
+; NO-SIMD128-NEXT: i32.select $push23=, $3, $11, $pop22
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop23
+; NO-SIMD128-NEXT: i32.extend16_s $push25=, $2
+; NO-SIMD128-NEXT: i32.extend16_s $push24=, $10
+; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24
+; NO-SIMD128-NEXT: i32.select $push27=, $2, $10, $pop26
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop27
+; NO-SIMD128-NEXT: i32.extend16_s $push29=, $1
+; NO-SIMD128-NEXT: i32.extend16_s $push28=, $9
; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28
-; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop31
-; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2
-; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10
-; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32
-; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop35
-; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1
-; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9
-; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36
-; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop39
+; NO-SIMD128-NEXT: i32.select $push31=, $1, $9, $pop30
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop31
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_s_v8i16:
@@ -6508,39 +5308,31 @@ define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8
; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12
; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12
; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18
-; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22
-; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $5
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $13
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $13, $pop18
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop19
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $14
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push22=, $pop21, $pop20
+; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $14, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop23
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push25=, $7
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push24=, $15
+; NO-SIMD128-FAST-NEXT: i32.lt_s $push26=, $pop25, $pop24
+; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $15, $pop26
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop27
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $8
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $16
; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28
-; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16
-; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34
-; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37
+; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $16, $pop30
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop31
; NO-SIMD128-FAST-NEXT: return
%c = icmp slt <8 x i16> %x, %y
%a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
@@ -6563,70 +5355,62 @@ define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: min_u_v8i16:
; NO-SIMD128: .functype min_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.const $push0=, 65535
; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0
-; NO-SIMD128-NEXT: i32.const $push55=, 65535
-; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55
+; NO-SIMD128-NEXT: i32.const $push47=, 65535
+; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop47
; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1
; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push11=, 12
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.const $push54=, 65535
-; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54
-; NO-SIMD128-NEXT: i32.const $push53=, 65535
-; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53
-; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push17=, 10
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.const $push52=, 65535
-; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52
-; NO-SIMD128-NEXT: i32.const $push51=, 65535
-; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51
-; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15
-; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push50=, 65535
-; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50
-; NO-SIMD128-NEXT: i32.const $push49=, 65535
-; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49
-; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19
-; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop22
-; NO-SIMD128-NEXT: i32.const $push27=, 6
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.const $push48=, 65535
-; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48
-; NO-SIMD128-NEXT: i32.const $push47=, 65535
-; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47
-; NO-SIMD128-NEXT: i32.lt_u $push25=, $pop24, $pop23
-; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25
-; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop4
; NO-SIMD128-NEXT: i32.const $push46=, 65535
-; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46
+; NO-SIMD128-NEXT: i32.and $push6=, $7, $pop46
; NO-SIMD128-NEXT: i32.const $push45=, 65535
-; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45
-; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29
-; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop32
+; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop45
+; NO-SIMD128-NEXT: i32.lt_u $push7=, $pop6, $pop5
+; NO-SIMD128-NEXT: i32.select $push8=, $7, $15, $pop7
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop8
; NO-SIMD128-NEXT: i32.const $push44=, 65535
-; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44
+; NO-SIMD128-NEXT: i32.and $push10=, $6, $pop44
; NO-SIMD128-NEXT: i32.const $push43=, 65535
-; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43
-; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33
-; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop36
+; NO-SIMD128-NEXT: i32.and $push9=, $14, $pop43
+; NO-SIMD128-NEXT: i32.lt_u $push11=, $pop10, $pop9
+; NO-SIMD128-NEXT: i32.select $push12=, $6, $14, $pop11
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop12
; NO-SIMD128-NEXT: i32.const $push42=, 65535
-; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42
+; NO-SIMD128-NEXT: i32.and $push14=, $5, $pop42
; NO-SIMD128-NEXT: i32.const $push41=, 65535
-; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41
-; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37
-; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop40
+; NO-SIMD128-NEXT: i32.and $push13=, $13, $pop41
+; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.select $push16=, $5, $13, $pop15
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop16
+; NO-SIMD128-NEXT: i32.const $push40=, 65535
+; NO-SIMD128-NEXT: i32.and $push18=, $4, $pop40
+; NO-SIMD128-NEXT: i32.const $push39=, 65535
+; NO-SIMD128-NEXT: i32.and $push17=, $12, $pop39
+; NO-SIMD128-NEXT: i32.lt_u $push19=, $pop18, $pop17
+; NO-SIMD128-NEXT: i32.select $push20=, $4, $12, $pop19
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop20
+; NO-SIMD128-NEXT: i32.const $push38=, 65535
+; NO-SIMD128-NEXT: i32.and $push22=, $3, $pop38
+; NO-SIMD128-NEXT: i32.const $push37=, 65535
+; NO-SIMD128-NEXT: i32.and $push21=, $11, $pop37
+; NO-SIMD128-NEXT: i32.lt_u $push23=, $pop22, $pop21
+; NO-SIMD128-NEXT: i32.select $push24=, $3, $11, $pop23
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop24
+; NO-SIMD128-NEXT: i32.const $push36=, 65535
+; NO-SIMD128-NEXT: i32.and $push26=, $2, $pop36
+; NO-SIMD128-NEXT: i32.const $push35=, 65535
+; NO-SIMD128-NEXT: i32.and $push25=, $10, $pop35
+; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25
+; NO-SIMD128-NEXT: i32.select $push28=, $2, $10, $pop27
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop28
+; NO-SIMD128-NEXT: i32.const $push34=, 65535
+; NO-SIMD128-NEXT: i32.and $push30=, $1, $pop34
+; NO-SIMD128-NEXT: i32.const $push33=, 65535
+; NO-SIMD128-NEXT: i32.and $push29=, $9, $pop33
+; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29
+; NO-SIMD128-NEXT: i32.select $push32=, $1, $9, $pop31
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop32
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_u_v8i16:
@@ -6634,68 +5418,60 @@ define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55
+; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47
; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53
+; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop46
+; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop45
; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5
; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51
+; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop44
+; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop43
; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9
; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49
+; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop42
+; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop41
; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13
; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19
-; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop40
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $pop39
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push19=, $pop18, $pop17
+; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $13, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop38
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $pop37
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push23=, $pop22, $pop21
+; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $14, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop35
+; NO-SIMD128-FAST-NEXT: i32.lt_u $push27=, $pop26, $pop25
+; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $15, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop34
+; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop33
; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29
-; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41
-; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35
-; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38
+; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $16, $pop31
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32
; NO-SIMD128-FAST-NEXT: return
%c = icmp ult <8 x i16> %x, %y
%a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
@@ -6718,54 +5494,46 @@ define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: max_s_v8i16:
; NO-SIMD128: .functype max_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push4=, 14
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8
; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16
; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0
; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3
-; NO-SIMD128-NEXT: i32.const $push10=, 12
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7
-; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15
-; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8
-; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9
-; NO-SIMD128-NEXT: i32.const $push16=, 10
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6
-; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop3
+; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7
+; NO-SIMD128-NEXT: i32.extend16_s $push4=, $15
+; NO-SIMD128-NEXT: i32.gt_s $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.select $push7=, $7, $15, $pop6
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop7
+; NO-SIMD128-NEXT: i32.extend16_s $push9=, $6
+; NO-SIMD128-NEXT: i32.extend16_s $push8=, $14
+; NO-SIMD128-NEXT: i32.gt_s $push10=, $pop9, $pop8
+; NO-SIMD128-NEXT: i32.select $push11=, $6, $14, $pop10
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop11
+; NO-SIMD128-NEXT: i32.extend16_s $push13=, $5
+; NO-SIMD128-NEXT: i32.extend16_s $push12=, $13
; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12
-; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14
-; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5
-; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13
-; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18
-; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop21
-; NO-SIMD128-NEXT: i32.const $push26=, 6
-; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4
-; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12
-; NO-SIMD128-NEXT: i32.gt_s $push24=, $pop23, $pop22
-; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24
-; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3
-; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11
+; NO-SIMD128-NEXT: i32.select $push15=, $5, $13, $pop14
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop15
+; NO-SIMD128-NEXT: i32.extend16_s $push17=, $4
+; NO-SIMD128-NEXT: i32.extend16_s $push16=, $12
+; NO-SIMD128-NEXT: i32.gt_s $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.select $push19=, $4, $12, $pop18
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop19
+; NO-SIMD128-NEXT: i32.extend16_s $push21=, $3
+; NO-SIMD128-NEXT: i32.extend16_s $push20=, $11
+; NO-SIMD128-NEXT: i32.gt_s $push22=, $pop21, $pop20
+; NO-SIMD128-NEXT: i32.select $push23=, $3, $11, $pop22
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop23
+; NO-SIMD128-NEXT: i32.extend16_s $push25=, $2
+; NO-SIMD128-NEXT: i32.extend16_s $push24=, $10
+; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24
+; NO-SIMD128-NEXT: i32.select $push27=, $2, $10, $pop26
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop27
+; NO-SIMD128-NEXT: i32.extend16_s $push29=, $1
+; NO-SIMD128-NEXT: i32.extend16_s $push28=, $9
; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28
-; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop31
-; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2
-; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10
-; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32
-; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop35
-; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1
-; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9
-; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36
-; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop39
+; NO-SIMD128-NEXT: i32.select $push31=, $1, $9, $pop30
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop31
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_s_v8i16:
@@ -6786,39 +5554,31 @@ define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8
; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12
; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12
; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18
-; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22
-; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $5
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $13
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $13, $pop18
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop19
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $14
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push22=, $pop21, $pop20
+; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $14, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop23
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push25=, $7
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push24=, $15
+; NO-SIMD128-FAST-NEXT: i32.gt_s $push26=, $pop25, $pop24
+; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $15, $pop26
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop27
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $8
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $16
; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28
-; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31
-; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16
-; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34
-; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37
+; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $16, $pop30
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop31
; NO-SIMD128-FAST-NEXT: return
%c = icmp sgt <8 x i16> %x, %y
%a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
@@ -6841,70 +5601,62 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: max_u_v8i16:
; NO-SIMD128: .functype max_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.const $push0=, 65535
; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0
-; NO-SIMD128-NEXT: i32.const $push55=, 65535
-; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55
+; NO-SIMD128-NEXT: i32.const $push47=, 65535
+; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop47
; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1
; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push11=, 12
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.const $push54=, 65535
-; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54
-; NO-SIMD128-NEXT: i32.const $push53=, 65535
-; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53
-; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push17=, 10
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.const $push52=, 65535
-; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52
-; NO-SIMD128-NEXT: i32.const $push51=, 65535
-; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51
-; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15
-; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.const $push50=, 65535
-; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50
-; NO-SIMD128-NEXT: i32.const $push49=, 65535
-; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49
-; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19
-; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop22
-; NO-SIMD128-NEXT: i32.const $push27=, 6
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.const $push48=, 65535
-; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48
-; NO-SIMD128-NEXT: i32.const $push47=, 65535
-; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47
-; NO-SIMD128-NEXT: i32.gt_u $push25=, $pop24, $pop23
-; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25
-; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop4
; NO-SIMD128-NEXT: i32.const $push46=, 65535
-; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46
+; NO-SIMD128-NEXT: i32.and $push6=, $7, $pop46
; NO-SIMD128-NEXT: i32.const $push45=, 65535
-; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45
-; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29
-; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop32
+; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop45
+; NO-SIMD128-NEXT: i32.gt_u $push7=, $pop6, $pop5
+; NO-SIMD128-NEXT: i32.select $push8=, $7, $15, $pop7
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop8
; NO-SIMD128-NEXT: i32.const $push44=, 65535
-; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44
+; NO-SIMD128-NEXT: i32.and $push10=, $6, $pop44
; NO-SIMD128-NEXT: i32.const $push43=, 65535
-; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43
-; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33
-; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop36
+; NO-SIMD128-NEXT: i32.and $push9=, $14, $pop43
+; NO-SIMD128-NEXT: i32.gt_u $push11=, $pop10, $pop9
+; NO-SIMD128-NEXT: i32.select $push12=, $6, $14, $pop11
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop12
; NO-SIMD128-NEXT: i32.const $push42=, 65535
-; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42
+; NO-SIMD128-NEXT: i32.and $push14=, $5, $pop42
; NO-SIMD128-NEXT: i32.const $push41=, 65535
-; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41
-; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37
-; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop40
+; NO-SIMD128-NEXT: i32.and $push13=, $13, $pop41
+; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.select $push16=, $5, $13, $pop15
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop16
+; NO-SIMD128-NEXT: i32.const $push40=, 65535
+; NO-SIMD128-NEXT: i32.and $push18=, $4, $pop40
+; NO-SIMD128-NEXT: i32.const $push39=, 65535
+; NO-SIMD128-NEXT: i32.and $push17=, $12, $pop39
+; NO-SIMD128-NEXT: i32.gt_u $push19=, $pop18, $pop17
+; NO-SIMD128-NEXT: i32.select $push20=, $4, $12, $pop19
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop20
+; NO-SIMD128-NEXT: i32.const $push38=, 65535
+; NO-SIMD128-NEXT: i32.and $push22=, $3, $pop38
+; NO-SIMD128-NEXT: i32.const $push37=, 65535
+; NO-SIMD128-NEXT: i32.and $push21=, $11, $pop37
+; NO-SIMD128-NEXT: i32.gt_u $push23=, $pop22, $pop21
+; NO-SIMD128-NEXT: i32.select $push24=, $3, $11, $pop23
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop24
+; NO-SIMD128-NEXT: i32.const $push36=, 65535
+; NO-SIMD128-NEXT: i32.and $push26=, $2, $pop36
+; NO-SIMD128-NEXT: i32.const $push35=, 65535
+; NO-SIMD128-NEXT: i32.and $push25=, $10, $pop35
+; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25
+; NO-SIMD128-NEXT: i32.select $push28=, $2, $10, $pop27
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop28
+; NO-SIMD128-NEXT: i32.const $push34=, 65535
+; NO-SIMD128-NEXT: i32.and $push30=, $1, $pop34
+; NO-SIMD128-NEXT: i32.const $push33=, 65535
+; NO-SIMD128-NEXT: i32.and $push29=, $9, $pop33
+; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29
+; NO-SIMD128-NEXT: i32.select $push32=, $1, $9, $pop31
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop32
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_u_v8i16:
@@ -6912,68 +5664,60 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55
+; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47
; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53
+; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop46
+; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop45
; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5
; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52
-; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51
+; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop44
+; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop43
; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9
; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49
+; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop42
+; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop41
; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13
; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19
-; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop40
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $pop39
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push19=, $pop18, $pop17
+; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $13, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop38
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $pop37
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push23=, $pop22, $pop21
+; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $14, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop35
+; NO-SIMD128-FAST-NEXT: i32.gt_u $push27=, $pop26, $pop25
+; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $15, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop34
+; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop33
; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29
-; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41
-; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35
-; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38
+; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $16, $pop31
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32
; NO-SIMD128-FAST-NEXT: return
%c = icmp ugt <8 x i16> %x, %y
%a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
@@ -6996,78 +5740,70 @@ define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: avgr_u_v8i16:
; NO-SIMD128: .functype avgr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push0=, 14
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.add $push2=, $8, $16
-; NO-SIMD128-NEXT: i32.const $push3=, 1
-; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 65534
-; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5
-; NO-SIMD128-NEXT: i32.const $push63=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63
-; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.add $push10=, $7, $15
-; NO-SIMD128-NEXT: i32.const $push62=, 1
-; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62
-; NO-SIMD128-NEXT: i32.const $push61=, 65534
-; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61
-; NO-SIMD128-NEXT: i32.const $push60=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13
-; NO-SIMD128-NEXT: i32.const $push14=, 10
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.add $push16=, $6, $14
-; NO-SIMD128-NEXT: i32.const $push59=, 1
-; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59
-; NO-SIMD128-NEXT: i32.const $push58=, 65534
-; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58
-; NO-SIMD128-NEXT: i32.const $push57=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19
-; NO-SIMD128-NEXT: i32.add $push20=, $5, $13
-; NO-SIMD128-NEXT: i32.const $push56=, 1
-; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56
-; NO-SIMD128-NEXT: i32.const $push55=, 65534
-; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55
+; NO-SIMD128-NEXT: i32.add $push0=, $8, $16
+; NO-SIMD128-NEXT: i32.const $push1=, 1
+; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1
+; NO-SIMD128-NEXT: i32.const $push3=, 65534
+; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3
+; NO-SIMD128-NEXT: i32.const $push55=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop55
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop5
+; NO-SIMD128-NEXT: i32.add $push6=, $7, $15
; NO-SIMD128-NEXT: i32.const $push54=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop23
-; NO-SIMD128-NEXT: i32.const $push24=, 6
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.add $push26=, $4, $12
-; NO-SIMD128-NEXT: i32.const $push53=, 1
-; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53
-; NO-SIMD128-NEXT: i32.const $push52=, 65534
-; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52
+; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop54
+; NO-SIMD128-NEXT: i32.const $push53=, 65534
+; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop53
+; NO-SIMD128-NEXT: i32.const $push52=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop52
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop9
+; NO-SIMD128-NEXT: i32.add $push10=, $6, $14
; NO-SIMD128-NEXT: i32.const $push51=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51
-; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29
-; NO-SIMD128-NEXT: i32.add $push30=, $3, $11
-; NO-SIMD128-NEXT: i32.const $push50=, 1
-; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50
-; NO-SIMD128-NEXT: i32.const $push49=, 65534
-; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49
+; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop51
+; NO-SIMD128-NEXT: i32.const $push50=, 65534
+; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop50
+; NO-SIMD128-NEXT: i32.const $push49=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop49
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop13
+; NO-SIMD128-NEXT: i32.add $push14=, $5, $13
; NO-SIMD128-NEXT: i32.const $push48=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop33
-; NO-SIMD128-NEXT: i32.add $push34=, $2, $10
-; NO-SIMD128-NEXT: i32.const $push47=, 1
-; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47
-; NO-SIMD128-NEXT: i32.const $push46=, 65534
-; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46
+; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop48
+; NO-SIMD128-NEXT: i32.const $push47=, 65534
+; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop47
+; NO-SIMD128-NEXT: i32.const $push46=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop46
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop17
+; NO-SIMD128-NEXT: i32.add $push18=, $4, $12
; NO-SIMD128-NEXT: i32.const $push45=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop37
-; NO-SIMD128-NEXT: i32.add $push38=, $1, $9
-; NO-SIMD128-NEXT: i32.const $push44=, 1
-; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44
-; NO-SIMD128-NEXT: i32.const $push43=, 65534
-; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43
+; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop45
+; NO-SIMD128-NEXT: i32.const $push44=, 65534
+; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop44
+; NO-SIMD128-NEXT: i32.const $push43=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop43
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop21
+; NO-SIMD128-NEXT: i32.add $push22=, $3, $11
; NO-SIMD128-NEXT: i32.const $push42=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop41
+; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop42
+; NO-SIMD128-NEXT: i32.const $push41=, 65534
+; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop41
+; NO-SIMD128-NEXT: i32.const $push40=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop40
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop25
+; NO-SIMD128-NEXT: i32.add $push26=, $2, $10
+; NO-SIMD128-NEXT: i32.const $push39=, 1
+; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop39
+; NO-SIMD128-NEXT: i32.const $push38=, 65534
+; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop38
+; NO-SIMD128-NEXT: i32.const $push37=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop37
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop29
+; NO-SIMD128-NEXT: i32.add $push30=, $1, $9
+; NO-SIMD128-NEXT: i32.const $push36=, 1
+; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop36
+; NO-SIMD128-NEXT: i32.const $push35=, 65534
+; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop35
+; NO-SIMD128-NEXT: i32.const $push34=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop34
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop33
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: avgr_u_v8i16:
@@ -7078,73 +5814,65 @@ define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1
; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63
+; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop55
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5
; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10
-; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61
-; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60
+; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop54
+; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop53
+; NO-SIMD128-FAST-NEXT: i32.const $push52=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop52
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9
; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11
-; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59
-; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58
-; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57
-; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12
-; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55
-; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19
-; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52
; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49
+; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop51
+; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop50
+; NO-SIMD128-FAST-NEXT: i32.const $push49=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop49
+; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $12
; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46
+; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop48
+; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop46
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop17
+; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $13
; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43
+; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop45
+; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop44
+; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop43
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $14
; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41
+; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop42
+; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop41
+; NO-SIMD128-FAST-NEXT: i32.const $push40=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop40
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop25
+; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $15
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop39
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop38
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop37
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop29
+; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $16
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop35
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop34
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop33
; NO-SIMD128-FAST-NEXT: return
%a = add nuw <8 x i16> %x, %y
%b = add nuw <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -7176,78 +5904,70 @@ define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: avgr_u_v8i16_wrap:
; NO-SIMD128: .functype avgr_u_v8i16_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push0=, 14
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.add $push2=, $8, $16
-; NO-SIMD128-NEXT: i32.const $push3=, 1
-; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 65534
-; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5
-; NO-SIMD128-NEXT: i32.const $push63=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63
-; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.add $push10=, $7, $15
-; NO-SIMD128-NEXT: i32.const $push62=, 1
-; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62
-; NO-SIMD128-NEXT: i32.const $push61=, 65534
-; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61
-; NO-SIMD128-NEXT: i32.const $push60=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13
-; NO-SIMD128-NEXT: i32.const $push14=, 10
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.add $push16=, $6, $14
-; NO-SIMD128-NEXT: i32.const $push59=, 1
-; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59
-; NO-SIMD128-NEXT: i32.const $push58=, 65534
-; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58
-; NO-SIMD128-NEXT: i32.const $push57=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19
-; NO-SIMD128-NEXT: i32.add $push20=, $5, $13
-; NO-SIMD128-NEXT: i32.const $push56=, 1
-; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56
-; NO-SIMD128-NEXT: i32.const $push55=, 65534
-; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55
+; NO-SIMD128-NEXT: i32.add $push0=, $8, $16
+; NO-SIMD128-NEXT: i32.const $push1=, 1
+; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1
+; NO-SIMD128-NEXT: i32.const $push3=, 65534
+; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3
+; NO-SIMD128-NEXT: i32.const $push55=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop55
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop5
+; NO-SIMD128-NEXT: i32.add $push6=, $7, $15
; NO-SIMD128-NEXT: i32.const $push54=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop23
-; NO-SIMD128-NEXT: i32.const $push24=, 6
-; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-NEXT: i32.add $push26=, $4, $12
-; NO-SIMD128-NEXT: i32.const $push53=, 1
-; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53
-; NO-SIMD128-NEXT: i32.const $push52=, 65534
-; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52
+; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop54
+; NO-SIMD128-NEXT: i32.const $push53=, 65534
+; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop53
+; NO-SIMD128-NEXT: i32.const $push52=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop52
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop9
+; NO-SIMD128-NEXT: i32.add $push10=, $6, $14
; NO-SIMD128-NEXT: i32.const $push51=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51
-; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29
-; NO-SIMD128-NEXT: i32.add $push30=, $3, $11
-; NO-SIMD128-NEXT: i32.const $push50=, 1
-; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50
-; NO-SIMD128-NEXT: i32.const $push49=, 65534
-; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49
+; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop51
+; NO-SIMD128-NEXT: i32.const $push50=, 65534
+; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop50
+; NO-SIMD128-NEXT: i32.const $push49=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop49
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop13
+; NO-SIMD128-NEXT: i32.add $push14=, $5, $13
; NO-SIMD128-NEXT: i32.const $push48=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop33
-; NO-SIMD128-NEXT: i32.add $push34=, $2, $10
-; NO-SIMD128-NEXT: i32.const $push47=, 1
-; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47
-; NO-SIMD128-NEXT: i32.const $push46=, 65534
-; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46
+; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop48
+; NO-SIMD128-NEXT: i32.const $push47=, 65534
+; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop47
+; NO-SIMD128-NEXT: i32.const $push46=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop46
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop17
+; NO-SIMD128-NEXT: i32.add $push18=, $4, $12
; NO-SIMD128-NEXT: i32.const $push45=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop37
-; NO-SIMD128-NEXT: i32.add $push38=, $1, $9
-; NO-SIMD128-NEXT: i32.const $push44=, 1
-; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44
-; NO-SIMD128-NEXT: i32.const $push43=, 65534
-; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43
+; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop45
+; NO-SIMD128-NEXT: i32.const $push44=, 65534
+; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop44
+; NO-SIMD128-NEXT: i32.const $push43=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop43
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop21
+; NO-SIMD128-NEXT: i32.add $push22=, $3, $11
; NO-SIMD128-NEXT: i32.const $push42=, 1
-; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop41
+; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop42
+; NO-SIMD128-NEXT: i32.const $push41=, 65534
+; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop41
+; NO-SIMD128-NEXT: i32.const $push40=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop40
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop25
+; NO-SIMD128-NEXT: i32.add $push26=, $2, $10
+; NO-SIMD128-NEXT: i32.const $push39=, 1
+; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop39
+; NO-SIMD128-NEXT: i32.const $push38=, 65534
+; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop38
+; NO-SIMD128-NEXT: i32.const $push37=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop37
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop29
+; NO-SIMD128-NEXT: i32.add $push30=, $1, $9
+; NO-SIMD128-NEXT: i32.const $push36=, 1
+; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop36
+; NO-SIMD128-NEXT: i32.const $push35=, 65534
+; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop35
+; NO-SIMD128-NEXT: i32.const $push34=, 1
+; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop34
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop33
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: avgr_u_v8i16_wrap:
@@ -7258,73 +5978,65 @@ define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1
; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63
+; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop55
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5
; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10
-; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62
-; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61
-; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60
+; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop54
+; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop53
+; NO-SIMD128-FAST-NEXT: i32.const $push52=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop52
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9
; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11
-; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59
-; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58
-; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57
-; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12
-; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56
-; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55
-; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19
-; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53
-; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52
; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24
-; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50
-; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49
+; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop51
+; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop50
+; NO-SIMD128-FAST-NEXT: i32.const $push49=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop49
+; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13
+; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $12
; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46
+; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop48
+; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop46
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop17
+; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $13
; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36
-; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1
-; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534
-; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43
+; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop45
+; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop44
+; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop43
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $14
; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41
+; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop42
+; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop41
+; NO-SIMD128-FAST-NEXT: i32.const $push40=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop40
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop25
+; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $15
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop39
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop38
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop37
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop29
+; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $16
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, 1
+; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop36
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65534
+; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop35
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, 1
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop34
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop33
; NO-SIMD128-FAST-NEXT: return
%a = add <8 x i16> %x, %y
%b = add <8 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -7348,70 +6060,62 @@ define <8 x i16> @abs_v8i16(<8 x i16> %x) {
; NO-SIMD128-LABEL: abs_v8i16:
; NO-SIMD128: .functype abs_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push4=, 14
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
; NO-SIMD128-NEXT: i32.extend16_s $push0=, $8
; NO-SIMD128-NEXT: i32.const $push1=, 15
-; NO-SIMD128-NEXT: i32.shr_s $push55=, $pop0, $pop1
-; NO-SIMD128-NEXT: local.tee $push54=, $9=, $pop55
-; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop54
+; NO-SIMD128-NEXT: i32.shr_s $push47=, $pop0, $pop1
+; NO-SIMD128-NEXT: local.tee $push46=, $9=, $pop47
+; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop46
; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.extend16_s $push6=, $7
-; NO-SIMD128-NEXT: i32.const $push53=, 15
-; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop6, $pop53
-; NO-SIMD128-NEXT: local.tee $push51=, $8=, $pop52
-; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop51
-; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $8
-; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push14=, 10
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.extend16_s $push11=, $6
-; NO-SIMD128-NEXT: i32.const $push50=, 15
-; NO-SIMD128-NEXT: i32.shr_s $push49=, $pop11, $pop50
-; NO-SIMD128-NEXT: local.tee $push48=, $8=, $pop49
-; NO-SIMD128-NEXT: i32.xor $push12=, $6, $pop48
-; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $8
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13
-; NO-SIMD128-NEXT: i32.extend16_s $push16=, $5
-; NO-SIMD128-NEXT: i32.const $push47=, 15
-; NO-SIMD128-NEXT: i32.shr_s $push46=, $pop16, $pop47
-; NO-SIMD128-NEXT: local.tee $push45=, $8=, $pop46
-; NO-SIMD128-NEXT: i32.xor $push17=, $5, $pop45
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop3
+; NO-SIMD128-NEXT: i32.extend16_s $push4=, $7
+; NO-SIMD128-NEXT: i32.const $push45=, 15
+; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop4, $pop45
+; NO-SIMD128-NEXT: local.tee $push43=, $8=, $pop44
+; NO-SIMD128-NEXT: i32.xor $push5=, $7, $pop43
+; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $8
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop6
+; NO-SIMD128-NEXT: i32.extend16_s $push7=, $6
+; NO-SIMD128-NEXT: i32.const $push42=, 15
+; NO-SIMD128-NEXT: i32.shr_s $push41=, $pop7, $pop42
+; NO-SIMD128-NEXT: local.tee $push40=, $8=, $pop41
+; NO-SIMD128-NEXT: i32.xor $push8=, $6, $pop40
+; NO-SIMD128-NEXT: i32.sub $push9=, $pop8, $8
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop9
+; NO-SIMD128-NEXT: i32.extend16_s $push10=, $5
+; NO-SIMD128-NEXT: i32.const $push39=, 15
+; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop10, $pop39
+; NO-SIMD128-NEXT: local.tee $push37=, $8=, $pop38
+; NO-SIMD128-NEXT: i32.xor $push11=, $5, $pop37
+; NO-SIMD128-NEXT: i32.sub $push12=, $pop11, $8
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop12
+; NO-SIMD128-NEXT: i32.extend16_s $push13=, $4
+; NO-SIMD128-NEXT: i32.const $push36=, 15
+; NO-SIMD128-NEXT: i32.shr_s $push35=, $pop13, $pop36
+; NO-SIMD128-NEXT: local.tee $push34=, $8=, $pop35
+; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop34
+; NO-SIMD128-NEXT: i32.sub $push15=, $pop14, $8
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop15
+; NO-SIMD128-NEXT: i32.extend16_s $push16=, $3
+; NO-SIMD128-NEXT: i32.const $push33=, 15
+; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop16, $pop33
+; NO-SIMD128-NEXT: local.tee $push31=, $8=, $pop32
+; NO-SIMD128-NEXT: i32.xor $push17=, $3, $pop31
; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $8
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop18
-; NO-SIMD128-NEXT: i32.const $push22=, 6
-; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-NEXT: i32.extend16_s $push19=, $4
-; NO-SIMD128-NEXT: i32.const $push44=, 15
-; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop19, $pop44
-; NO-SIMD128-NEXT: local.tee $push42=, $8=, $pop43
-; NO-SIMD128-NEXT: i32.xor $push20=, $4, $pop42
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop18
+; NO-SIMD128-NEXT: i32.extend16_s $push19=, $2
+; NO-SIMD128-NEXT: i32.const $push30=, 15
+; NO-SIMD128-NEXT: i32.shr_s $push29=, $pop19, $pop30
+; NO-SIMD128-NEXT: local.tee $push28=, $8=, $pop29
+; NO-SIMD128-NEXT: i32.xor $push20=, $2, $pop28
; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $8
-; NO-SIMD128-NEXT: i32.store16 0($pop23), $pop21
-; NO-SIMD128-NEXT: i32.extend16_s $push24=, $3
-; NO-SIMD128-NEXT: i32.const $push41=, 15
-; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop24, $pop41
-; NO-SIMD128-NEXT: local.tee $push39=, $8=, $pop40
-; NO-SIMD128-NEXT: i32.xor $push25=, $3, $pop39
-; NO-SIMD128-NEXT: i32.sub $push26=, $pop25, $8
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop26
-; NO-SIMD128-NEXT: i32.extend16_s $push27=, $2
-; NO-SIMD128-NEXT: i32.const $push38=, 15
-; NO-SIMD128-NEXT: i32.shr_s $push37=, $pop27, $pop38
-; NO-SIMD128-NEXT: local.tee $push36=, $8=, $pop37
-; NO-SIMD128-NEXT: i32.xor $push28=, $2, $pop36
-; NO-SIMD128-NEXT: i32.sub $push29=, $pop28, $8
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop29
-; NO-SIMD128-NEXT: i32.extend16_s $push30=, $1
-; NO-SIMD128-NEXT: i32.const $push35=, 15
-; NO-SIMD128-NEXT: i32.shr_s $push34=, $pop30, $pop35
-; NO-SIMD128-NEXT: local.tee $push33=, $8=, $pop34
-; NO-SIMD128-NEXT: i32.xor $push31=, $1, $pop33
-; NO-SIMD128-NEXT: i32.sub $push32=, $pop31, $8
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop32
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop21
+; NO-SIMD128-NEXT: i32.extend16_s $push22=, $1
+; NO-SIMD128-NEXT: i32.const $push27=, 15
+; NO-SIMD128-NEXT: i32.shr_s $push26=, $pop22, $pop27
+; NO-SIMD128-NEXT: local.tee $push25=, $8=, $pop26
+; NO-SIMD128-NEXT: i32.xor $push23=, $1, $pop25
+; NO-SIMD128-NEXT: i32.sub $push24=, $pop23, $8
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop24
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: abs_v8i16:
@@ -7419,68 +6123,60 @@ define <8 x i16> @abs_v8i16(<8 x i16> %x) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $1
; NO-SIMD128-FAST-NEXT: i32.const $push1=, 15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop0, $pop1
-; NO-SIMD128-FAST-NEXT: local.tee $push54=, $9=, $pop55
-; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop54
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push47=, $pop0, $pop1
+; NO-SIMD128-FAST-NEXT: local.tee $push46=, $9=, $pop47
+; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop46
; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $9
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $2
-; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push52=, $pop4, $pop53
-; NO-SIMD128-FAST-NEXT: local.tee $push51=, $1=, $pop52
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop51
+; NO-SIMD128-FAST-NEXT: i32.const $push45=, 15
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push44=, $pop4, $pop45
+; NO-SIMD128-FAST-NEXT: local.tee $push43=, $1=, $pop44
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop43
; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $3
-; NO-SIMD128-FAST-NEXT: i32.const $push50=, 15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push49=, $pop7, $pop50
-; NO-SIMD128-FAST-NEXT: local.tee $push48=, $2=, $pop49
-; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop48
+; NO-SIMD128-FAST-NEXT: i32.const $push42=, 15
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push41=, $pop7, $pop42
+; NO-SIMD128-FAST-NEXT: local.tee $push40=, $2=, $pop41
+; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop40
; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop10, $pop47
-; NO-SIMD128-FAST-NEXT: local.tee $push45=, $3=, $pop46
-; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop45
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 15
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop10, $pop39
+; NO-SIMD128-FAST-NEXT: local.tee $push37=, $3=, $pop38
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop37
; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $5
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push43=, $pop15, $pop44
-; NO-SIMD128-FAST-NEXT: local.tee $push42=, $4=, $pop43
-; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop42
-; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $6
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop18, $pop41
-; NO-SIMD128-FAST-NEXT: local.tee $push39=, $5=, $pop40
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop39
-; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $7
-; NO-SIMD128-FAST-NEXT: i32.const $push38=, 15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push37=, $pop23, $pop38
-; NO-SIMD128-FAST-NEXT: local.tee $push36=, $6=, $pop37
-; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop36
-; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $8
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop28, $pop35
-; NO-SIMD128-FAST-NEXT: local.tee $push33=, $0=, $pop34
-; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33
-; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $0
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $5
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop13, $pop36
+; NO-SIMD128-FAST-NEXT: local.tee $push34=, $4=, $pop35
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $5, $pop34
+; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop14, $4
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $6
+; NO-SIMD128-FAST-NEXT: i32.const $push33=, 15
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop16, $pop33
+; NO-SIMD128-FAST-NEXT: local.tee $push31=, $5=, $pop32
+; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $6, $pop31
+; NO-SIMD128-FAST-NEXT: i32.sub $push18=, $pop17, $5
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7
+; NO-SIMD128-FAST-NEXT: i32.const $push30=, 15
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push29=, $pop19, $pop30
+; NO-SIMD128-FAST-NEXT: local.tee $push28=, $6=, $pop29
+; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $7, $pop28
+; NO-SIMD128-FAST-NEXT: i32.sub $push21=, $pop20, $6
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $8
+; NO-SIMD128-FAST-NEXT: i32.const $push27=, 15
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop22, $pop27
+; NO-SIMD128-FAST-NEXT: local.tee $push25=, $7=, $pop26
+; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $8, $pop25
+; NO-SIMD128-FAST-NEXT: i32.sub $push24=, $pop23, $7
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24
; NO-SIMD128-FAST-NEXT: return
%a = sub <8 x i16> zeroinitializer, %x
%b = icmp slt <8 x i16> %x, zeroinitializer
@@ -7505,37 +6201,29 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) {
; NO-SIMD128: .functype neg_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 0
-; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $5
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push23=, 0
-; NO-SIMD128-NEXT: i32.sub $push2=, $pop23, $3
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push22=, 0
-; NO-SIMD128-NEXT: i32.sub $push3=, $pop22, $2
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push21=, 0
-; NO-SIMD128-NEXT: i32.sub $push4=, $pop21, $1
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 14
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.const $push20=, 0
-; NO-SIMD128-NEXT: i32.sub $push5=, $pop20, $8
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.const $push19=, 0
-; NO-SIMD128-NEXT: i32.sub $push8=, $pop19, $7
-; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 10
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.const $push18=, 0
-; NO-SIMD128-NEXT: i32.sub $push11=, $pop18, $6
-; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 6
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.const $push17=, 0
-; NO-SIMD128-NEXT: i32.sub $push14=, $pop17, $4
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
+; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $8
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop1
+; NO-SIMD128-NEXT: i32.const $push15=, 0
+; NO-SIMD128-NEXT: i32.sub $push2=, $pop15, $7
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push14=, 0
+; NO-SIMD128-NEXT: i32.sub $push3=, $pop14, $6
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push13=, 0
+; NO-SIMD128-NEXT: i32.sub $push4=, $pop13, $5
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push12=, 0
+; NO-SIMD128-NEXT: i32.sub $push5=, $pop12, $4
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop5
+; NO-SIMD128-NEXT: i32.const $push11=, 0
+; NO-SIMD128-NEXT: i32.sub $push6=, $pop11, $3
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push10=, 0
+; NO-SIMD128-NEXT: i32.sub $push7=, $pop10, $2
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop7
+; NO-SIMD128-NEXT: i32.const $push9=, 0
+; NO-SIMD128-NEXT: i32.sub $push8=, $pop9, $1
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: neg_v8i16:
@@ -7544,35 +6232,27 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0
; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop23, $2
+; NO-SIMD128-FAST-NEXT: i32.const $push15=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop15, $2
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop22, $3
+; NO-SIMD128-FAST-NEXT: i32.const $push14=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop14, $3
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop21, $4
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop20, $5
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop19, $6
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop18, $7
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $8
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push13=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop13, $4
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.const $push12=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $pop12, $5
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.const $push11=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop11, $6
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push10=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop10, $7
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop9, $8
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%a = sub <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
%x
@@ -7596,64 +6276,48 @@ define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0
-; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18
-; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop17
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop1
-; NO-SIMD128-NEXT: i32.shl $push2=, $3, $9
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-NEXT: i32.shl $push3=, $2, $9
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop3
-; NO-SIMD128-NEXT: i32.shl $push4=, $1, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 14
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.shl $push5=, $8, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.shl $push8=, $7, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 10
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.shl $push11=, $6, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 6
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.shl $push14=, $4, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
+; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop0
+; NO-SIMD128-NEXT: local.tee $push9=, $9=, $pop10
+; NO-SIMD128-NEXT: i32.shl $push1=, $8, $pop9
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop1
+; NO-SIMD128-NEXT: i32.shl $push2=, $7, $9
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop2
+; NO-SIMD128-NEXT: i32.shl $push3=, $6, $9
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop3
+; NO-SIMD128-NEXT: i32.shl $push4=, $5, $9
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-NEXT: i32.shl $push5=, $4, $9
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop5
+; NO-SIMD128-NEXT: i32.shl $push6=, $3, $9
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop6
+; NO-SIMD128-NEXT: i32.shl $push7=, $2, $9
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop7
+; NO-SIMD128-NEXT: i32.shl $push8=, $1, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_v8i16:
; NO-SIMD128-FAST: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0
-; NO-SIMD128-FAST-NEXT: local.tee $push17=, $9=, $pop18
-; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $9, $pop0
+; NO-SIMD128-FAST-NEXT: local.tee $push9=, $9=, $pop10
+; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop9
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $9
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2
; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $9
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $9
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6
-; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $9
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $9
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $9
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $9
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16
+; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $9
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $9
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $9
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $9
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $9
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <8 x i16> undef, i16 %x, i32 0
%s = shufflevector <8 x i16> %t, <8 x i16> undef,
@@ -7681,37 +6345,29 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
; NO-SIMD128: .functype shl_const_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 5
-; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop0
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push23=, 5
-; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop23
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push22=, 5
-; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop22
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push21=, 5
-; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop21
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 14
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.const $push20=, 5
-; NO-SIMD128-NEXT: i32.shl $push5=, $8, $pop20
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.const $push19=, 5
-; NO-SIMD128-NEXT: i32.shl $push8=, $7, $pop19
-; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 10
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.const $push18=, 5
-; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop18
-; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 6
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.const $push17=, 5
-; NO-SIMD128-NEXT: i32.shl $push14=, $4, $pop17
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
+; NO-SIMD128-NEXT: i32.shl $push1=, $8, $pop0
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop1
+; NO-SIMD128-NEXT: i32.const $push15=, 5
+; NO-SIMD128-NEXT: i32.shl $push2=, $7, $pop15
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push14=, 5
+; NO-SIMD128-NEXT: i32.shl $push3=, $6, $pop14
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push13=, 5
+; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop13
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push12=, 5
+; NO-SIMD128-NEXT: i32.shl $push5=, $4, $pop12
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop5
+; NO-SIMD128-NEXT: i32.const $push11=, 5
+; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push10=, 5
+; NO-SIMD128-NEXT: i32.shl $push7=, $2, $pop10
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop7
+; NO-SIMD128-NEXT: i32.const $push9=, 5
+; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_const_v8i16:
@@ -7720,35 +6376,27 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5
; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop23
+; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop15
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop22
+; NO-SIMD128-FAST-NEXT: i32.const $push14=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop14
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop21
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop20
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $pop19
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $pop18
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop17
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.const $push12=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $pop12
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.const $push11=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $pop11
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push10=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $pop10
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%a = shl <8 x i16> %v,
<i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -7866,45 +6514,37 @@ define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
; NO-SIMD128: .functype shl_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0
-; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop1
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push31=, 65535
-; NO-SIMD128-NEXT: i32.and $push3=, $11, $pop31
-; NO-SIMD128-NEXT: i32.shl $push4=, $3, $pop3
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push30=, 65535
-; NO-SIMD128-NEXT: i32.and $push5=, $10, $pop30
-; NO-SIMD128-NEXT: i32.shl $push6=, $2, $pop5
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push29=, 65535
-; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop29
-; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop7
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push11=, 14
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.const $push28=, 65535
-; NO-SIMD128-NEXT: i32.and $push9=, $16, $pop28
-; NO-SIMD128-NEXT: i32.shl $push10=, $8, $pop9
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.const $push27=, 65535
-; NO-SIMD128-NEXT: i32.and $push13=, $15, $pop27
-; NO-SIMD128-NEXT: i32.shl $push14=, $7, $pop13
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push19=, 10
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.const $push26=, 65535
-; NO-SIMD128-NEXT: i32.and $push17=, $14, $pop26
-; NO-SIMD128-NEXT: i32.shl $push18=, $6, $pop17
-; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push23=, 6
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.const $push25=, 65535
-; NO-SIMD128-NEXT: i32.and $push21=, $12, $pop25
-; NO-SIMD128-NEXT: i32.shl $push22=, $4, $pop21
-; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22
+; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0
+; NO-SIMD128-NEXT: i32.shl $push2=, $8, $pop1
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push23=, 65535
+; NO-SIMD128-NEXT: i32.and $push3=, $15, $pop23
+; NO-SIMD128-NEXT: i32.shl $push4=, $7, $pop3
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push22=, 65535
+; NO-SIMD128-NEXT: i32.and $push5=, $14, $pop22
+; NO-SIMD128-NEXT: i32.shl $push6=, $6, $pop5
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push21=, 65535
+; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop21
+; NO-SIMD128-NEXT: i32.shl $push8=, $5, $pop7
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push20=, 65535
+; NO-SIMD128-NEXT: i32.and $push9=, $12, $pop20
+; NO-SIMD128-NEXT: i32.shl $push10=, $4, $pop9
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push19=, 65535
+; NO-SIMD128-NEXT: i32.and $push11=, $11, $pop19
+; NO-SIMD128-NEXT: i32.shl $push12=, $3, $pop11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push18=, 65535
+; NO-SIMD128-NEXT: i32.and $push13=, $10, $pop18
+; NO-SIMD128-NEXT: i32.shl $push14=, $2, $pop13
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push17=, 65535
+; NO-SIMD128-NEXT: i32.and $push15=, $9, $pop17
+; NO-SIMD128-NEXT: i32.shl $push16=, $1, $pop15
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_vec_v8i16:
@@ -7914,42 +6554,34 @@ define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop0
; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop31
+; NO-SIMD128-FAST-NEXT: i32.const $push23=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop23
; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push22=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop22
; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push9=, $12, $pop29
-; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $13, $pop28
-; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $14, $pop27
-; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26
-; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop25
-; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $12, $pop21
+; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $13, $pop20
+; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $5, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $14, $pop19
+; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $6, $pop11
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $15, $pop18
+; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $7, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push15=, $16, $pop17
+; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop15
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%a = shl <8 x i16> %v, %x
ret <8 x i16> %a
@@ -7971,41 +6603,33 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-LABEL: shr_s_v8i16:
; NO-SIMD128: .functype shr_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.extend16_s $push1=, $5
+; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8
; NO-SIMD128-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-NEXT: i32.and $push26=, $9, $pop0
-; NO-SIMD128-NEXT: local.tee $push25=, $9=, $pop26
-; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop25
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop2
-; NO-SIMD128-NEXT: i32.extend16_s $push3=, $3
+; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0
+; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18
+; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop17
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop2
+; NO-SIMD128-NEXT: i32.extend16_s $push3=, $7
; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $9
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop4
-; NO-SIMD128-NEXT: i32.extend16_s $push5=, $2
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop4
+; NO-SIMD128-NEXT: i32.extend16_s $push5=, $6
; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $9
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
-; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop6
+; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5
; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push11=, 14
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.extend16_s $push9=, $8
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop8
+; NO-SIMD128-NEXT: i32.extend16_s $push9=, $4
; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.extend16_s $push13=, $7
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop10
+; NO-SIMD128-NEXT: i32.extend16_s $push11=, $3
+; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $9
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop12
+; NO-SIMD128-NEXT: i32.extend16_s $push13=, $2
; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push19=, 10
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.extend16_s $push17=, $6
-; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push23=, 6
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.extend16_s $push21=, $4
-; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop14
+; NO-SIMD128-NEXT: i32.extend16_s $push15=, $1
+; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_s_v8i16:
@@ -8013,9 +6637,9 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $1
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop0
-; NO-SIMD128-FAST-NEXT: local.tee $push25=, $1=, $pop26
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop25
+; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0
+; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop17
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push3=, $2
; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1
@@ -8023,29 +6647,21 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $3
; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $4
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $4
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $1
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $5
; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $5
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $6
; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $6
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $7
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $1
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $8
; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <8 x i16> undef, i16 %x, i32 0
%s = shufflevector <8 x i16> %t, <8 x i16> undef,
@@ -8164,54 +6780,46 @@ define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
; NO-SIMD128-LABEL: shr_s_vec_v8i16:
; NO-SIMD128: .functype shr_s_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.extend16_s $push2=, $5
+; NO-SIMD128-NEXT: i32.extend16_s $push2=, $8
; NO-SIMD128-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0
+; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0
; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
-; NO-SIMD128-NEXT: i32.extend16_s $push5=, $3
-; NO-SIMD128-NEXT: i32.const $push39=, 65535
-; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop39
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop3
+; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7
+; NO-SIMD128-NEXT: i32.const $push31=, 65535
+; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop31
; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop6
-; NO-SIMD128-NEXT: i32.extend16_s $push8=, $2
-; NO-SIMD128-NEXT: i32.const $push38=, 65535
-; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop38
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop6
+; NO-SIMD128-NEXT: i32.extend16_s $push8=, $6
+; NO-SIMD128-NEXT: i32.const $push30=, 65535
+; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop30
; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop9
-; NO-SIMD128-NEXT: i32.extend16_s $push11=, $1
-; NO-SIMD128-NEXT: i32.const $push37=, 65535
-; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop37
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop9
+; NO-SIMD128-NEXT: i32.extend16_s $push11=, $5
+; NO-SIMD128-NEXT: i32.const $push29=, 65535
+; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop29
; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop12
-; NO-SIMD128-NEXT: i32.const $push16=, 14
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.extend16_s $push14=, $8
-; NO-SIMD128-NEXT: i32.const $push36=, 65535
-; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop36
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop12
+; NO-SIMD128-NEXT: i32.extend16_s $push14=, $4
+; NO-SIMD128-NEXT: i32.const $push28=, 65535
+; NO-SIMD128-NEXT: i32.and $push13=, $12, $pop28
; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push21=, 12
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.extend16_s $push19=, $7
-; NO-SIMD128-NEXT: i32.const $push35=, 65535
-; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop35
-; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $pop18
-; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push26=, 10
-; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26
-; NO-SIMD128-NEXT: i32.extend16_s $push24=, $6
-; NO-SIMD128-NEXT: i32.const $push34=, 65535
-; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop34
-; NO-SIMD128-NEXT: i32.shr_s $push25=, $pop24, $pop23
-; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-NEXT: i32.const $push31=, 6
-; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31
-; NO-SIMD128-NEXT: i32.extend16_s $push29=, $4
-; NO-SIMD128-NEXT: i32.const $push33=, 65535
-; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33
-; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28
-; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop15
+; NO-SIMD128-NEXT: i32.extend16_s $push17=, $3
+; NO-SIMD128-NEXT: i32.const $push27=, 65535
+; NO-SIMD128-NEXT: i32.and $push16=, $11, $pop27
+; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop18
+; NO-SIMD128-NEXT: i32.extend16_s $push20=, $2
+; NO-SIMD128-NEXT: i32.const $push26=, 65535
+; NO-SIMD128-NEXT: i32.and $push19=, $10, $pop26
+; NO-SIMD128-NEXT: i32.shr_s $push21=, $pop20, $pop19
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop21
+; NO-SIMD128-NEXT: i32.extend16_s $push23=, $1
+; NO-SIMD128-NEXT: i32.const $push25=, 65535
+; NO-SIMD128-NEXT: i32.and $push22=, $9, $pop25
+; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop24
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_s_vec_v8i16:
@@ -8223,48 +6831,40 @@ define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $2
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop39
+; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop31
; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push8=, $3
-; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop38
+; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop30
; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4
-; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $12, $pop37
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop14
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $5
-; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop36
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push18=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6
-; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $14, $pop35
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop19), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push26=, $7
-; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop34
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop27
-; NO-SIMD128-FAST-NEXT: i32.const $push28=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push31=, $8
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $16, $pop33
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop29), $pop32
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $4
+; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop29
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop10
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push14=, $5
+; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $13, $pop28
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $6
+; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $14, $pop27
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $7
+; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8
+; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $16, $pop25
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24
; NO-SIMD128-FAST-NEXT: return
%a = ashr <8 x i16> %v, %x
ret <8 x i16> %a
@@ -8287,48 +6887,40 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128: .functype shr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-NEXT: i32.and $push1=, $5, $pop0
-; NO-SIMD128-NEXT: i32.const $push34=, 65535
-; NO-SIMD128-NEXT: i32.and $push33=, $9, $pop34
-; NO-SIMD128-NEXT: local.tee $push32=, $9=, $pop33
-; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop32
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push31=, 65535
-; NO-SIMD128-NEXT: i32.and $push3=, $3, $pop31
+; NO-SIMD128-NEXT: i32.and $push1=, $8, $pop0
+; NO-SIMD128-NEXT: i32.const $push26=, 65535
+; NO-SIMD128-NEXT: i32.and $push25=, $9, $pop26
+; NO-SIMD128-NEXT: local.tee $push24=, $9=, $pop25
+; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop24
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push23=, 65535
+; NO-SIMD128-NEXT: i32.and $push3=, $7, $pop23
; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $9
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push30=, 65535
-; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop30
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push22=, 65535
+; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop22
; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $9
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push29=, 65535
-; NO-SIMD128-NEXT: i32.and $push7=, $1, $pop29
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push21=, 65535
+; NO-SIMD128-NEXT: i32.and $push7=, $5, $pop21
; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push11=, 14
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.const $push28=, 65535
-; NO-SIMD128-NEXT: i32.and $push9=, $8, $pop28
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push20=, 65535
+; NO-SIMD128-NEXT: i32.and $push9=, $4, $pop20
; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.const $push27=, 65535
-; NO-SIMD128-NEXT: i32.and $push13=, $7, $pop27
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push19=, 65535
+; NO-SIMD128-NEXT: i32.and $push11=, $3, $pop19
+; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $9
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push18=, 65535
+; NO-SIMD128-NEXT: i32.and $push13=, $2, $pop18
; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push19=, 10
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.const $push26=, 65535
-; NO-SIMD128-NEXT: i32.and $push17=, $6, $pop26
-; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push23=, 6
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.const $push25=, 65535
-; NO-SIMD128-NEXT: i32.and $push21=, $4, $pop25
-; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $9
-; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push17=, 65535
+; NO-SIMD128-NEXT: i32.and $push15=, $1, $pop17
+; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_u_v8i16:
@@ -8336,47 +6928,39 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $pop34
-; NO-SIMD128-FAST-NEXT: local.tee $push32=, $1=, $pop33
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop32
+; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop26
+; NO-SIMD128-FAST-NEXT: local.tee $push24=, $1=, $pop25
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop24
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop31
+; NO-SIMD128-FAST-NEXT: i32.const $push23=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop23
; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push22=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop22
; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop29
+; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop21
; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop28
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push9=, $5, $pop20
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push10=, $pop9, $1
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $6, $pop19
; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push15=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $7, $pop18
; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop16), $pop14
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop26
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop20), $pop18
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop25
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop22
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $pop17
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $1
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <8 x i16> undef, i16 %x, i32 0
%s = shufflevector <8 x i16> %t, <8 x i16> undef,
@@ -8496,61 +7080,53 @@ define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
; NO-SIMD128: .functype shr_u_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0
-; NO-SIMD128-NEXT: i32.const $push47=, 65535
-; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop47
-; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push46=, 65535
-; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46
-; NO-SIMD128-NEXT: i32.const $push45=, 65535
-; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop45
-; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push44=, 65535
-; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44
-; NO-SIMD128-NEXT: i32.const $push43=, 65535
-; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop43
-; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop9
-; NO-SIMD128-NEXT: i32.const $push42=, 65535
-; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42
-; NO-SIMD128-NEXT: i32.const $push41=, 65535
-; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop41
-; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop12
-; NO-SIMD128-NEXT: i32.const $push16=, 14
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.const $push40=, 65535
-; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40
+; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0
; NO-SIMD128-NEXT: i32.const $push39=, 65535
-; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop39
-; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push21=, 12
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
+; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop39
+; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop3
; NO-SIMD128-NEXT: i32.const $push38=, 65535
-; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38
+; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop38
; NO-SIMD128-NEXT: i32.const $push37=, 65535
-; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop37
-; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $pop18
-; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push26=, 10
-; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26
+; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop37
+; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop6
; NO-SIMD128-NEXT: i32.const $push36=, 65535
-; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36
+; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop36
; NO-SIMD128-NEXT: i32.const $push35=, 65535
-; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop35
-; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop23
-; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-NEXT: i32.const $push31=, 6
-; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31
+; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop35
+; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop9
; NO-SIMD128-NEXT: i32.const $push34=, 65535
-; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34
+; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop34
; NO-SIMD128-NEXT: i32.const $push33=, 65535
-; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33
-; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28
-; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30
+; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop33
+; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push32=, 65535
+; NO-SIMD128-NEXT: i32.and $push14=, $4, $pop32
+; NO-SIMD128-NEXT: i32.const $push31=, 65535
+; NO-SIMD128-NEXT: i32.and $push13=, $12, $pop31
+; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop15
+; NO-SIMD128-NEXT: i32.const $push30=, 65535
+; NO-SIMD128-NEXT: i32.and $push17=, $3, $pop30
+; NO-SIMD128-NEXT: i32.const $push29=, 65535
+; NO-SIMD128-NEXT: i32.and $push16=, $11, $pop29
+; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop18
+; NO-SIMD128-NEXT: i32.const $push28=, 65535
+; NO-SIMD128-NEXT: i32.and $push20=, $2, $pop28
+; NO-SIMD128-NEXT: i32.const $push27=, 65535
+; NO-SIMD128-NEXT: i32.and $push19=, $10, $pop27
+; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop19
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop21
+; NO-SIMD128-NEXT: i32.const $push26=, 65535
+; NO-SIMD128-NEXT: i32.and $push23=, $1, $pop26
+; NO-SIMD128-NEXT: i32.const $push25=, 65535
+; NO-SIMD128-NEXT: i32.and $push22=, $9, $pop25
+; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop24
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_u_vec_v8i16:
@@ -8558,60 +7134,52 @@ define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop39
; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop45
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4
-; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop43
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7
-; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop41
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop39
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop38
; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $14, $pop37
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop37
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4
+; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop36
; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $15, $pop35
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop35
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9
; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop34
; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $16, $pop33
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop33
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push32=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop32
+; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $13, $pop31
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $14, $pop29
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop27
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop26
+; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $16, $pop25
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24
; NO-SIMD128-FAST-NEXT: return
%a = lshr <8 x i16> %v, %x
ret <8 x i16> %a
@@ -8633,30 +7201,22 @@ define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: and_v8i16:
; NO-SIMD128: .functype and_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.and $push0=, $5, $13
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop0
-; NO-SIMD128-NEXT: i32.and $push1=, $3, $11
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop1
-; NO-SIMD128-NEXT: i32.and $push2=, $2, $10
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-NEXT: i32.and $push3=, $1, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: i32.and $push4=, $8, $16
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.and $push7=, $7, $15
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7
-; NO-SIMD128-NEXT: i32.const $push11=, 10
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.and $push10=, $6, $14
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push14=, 6
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.and $push13=, $4, $12
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13
+; NO-SIMD128-NEXT: i32.and $push0=, $8, $16
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop0
+; NO-SIMD128-NEXT: i32.and $push1=, $7, $15
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop1
+; NO-SIMD128-NEXT: i32.and $push2=, $6, $14
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop2
+; NO-SIMD128-NEXT: i32.and $push3=, $5, $13
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
+; NO-SIMD128-NEXT: i32.and $push4=, $4, $12
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-NEXT: i32.and $push5=, $3, $11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop5
+; NO-SIMD128-NEXT: i32.and $push6=, $2, $10
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
+; NO-SIMD128-NEXT: i32.and $push7=, $1, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: and_v8i16:
@@ -8668,24 +7228,16 @@ define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $12
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $13
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $14
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $16
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15
+; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $12
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $5, $13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $14
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.and $push6=, $7, $15
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $8, $16
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = and <8 x i16> %x, %y
ret <8 x i16> %a
@@ -8707,30 +7259,22 @@ define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: or_v8i16:
; NO-SIMD128: .functype or_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.or $push0=, $5, $13
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop0
-; NO-SIMD128-NEXT: i32.or $push1=, $3, $11
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop1
-; NO-SIMD128-NEXT: i32.or $push2=, $2, $10
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-NEXT: i32.or $push3=, $1, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: i32.or $push4=, $8, $16
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.or $push7=, $7, $15
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7
-; NO-SIMD128-NEXT: i32.const $push11=, 10
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.or $push10=, $6, $14
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push14=, 6
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.or $push13=, $4, $12
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13
+; NO-SIMD128-NEXT: i32.or $push0=, $8, $16
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop0
+; NO-SIMD128-NEXT: i32.or $push1=, $7, $15
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop1
+; NO-SIMD128-NEXT: i32.or $push2=, $6, $14
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop2
+; NO-SIMD128-NEXT: i32.or $push3=, $5, $13
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
+; NO-SIMD128-NEXT: i32.or $push4=, $4, $12
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-NEXT: i32.or $push5=, $3, $11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop5
+; NO-SIMD128-NEXT: i32.or $push6=, $2, $10
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
+; NO-SIMD128-NEXT: i32.or $push7=, $1, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: or_v8i16:
@@ -8742,24 +7286,16 @@ define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $12
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $13
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $14
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $16
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15
+; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $12
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.or $push4=, $5, $13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.or $push5=, $6, $14
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.or $push6=, $7, $15
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.or $push7=, $8, $16
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = or <8 x i16> %x, %y
ret <8 x i16> %a
@@ -8781,30 +7317,22 @@ define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: xor_v8i16:
; NO-SIMD128: .functype xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.xor $push0=, $5, $13
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop0
-; NO-SIMD128-NEXT: i32.xor $push1=, $3, $11
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop1
-; NO-SIMD128-NEXT: i32.xor $push2=, $2, $10
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-NEXT: i32.xor $push3=, $1, $9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: i32.xor $push4=, $8, $16
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.xor $push7=, $7, $15
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7
-; NO-SIMD128-NEXT: i32.const $push11=, 10
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.xor $push10=, $6, $14
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push14=, 6
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.xor $push13=, $4, $12
-; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13
+; NO-SIMD128-NEXT: i32.xor $push0=, $8, $16
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop0
+; NO-SIMD128-NEXT: i32.xor $push1=, $7, $15
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop1
+; NO-SIMD128-NEXT: i32.xor $push2=, $6, $14
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop2
+; NO-SIMD128-NEXT: i32.xor $push3=, $5, $13
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
+; NO-SIMD128-NEXT: i32.xor $push4=, $4, $12
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-NEXT: i32.xor $push5=, $3, $11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop5
+; NO-SIMD128-NEXT: i32.xor $push6=, $2, $10
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
+; NO-SIMD128-NEXT: i32.xor $push7=, $1, $9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: xor_v8i16:
@@ -8816,24 +7344,16 @@ define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $12
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5
-; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $13
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $14
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $16
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $12
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3
+; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $5, $13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $6, $14
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $15
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $16
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%a = xor <8 x i16> %x, %y
ret <8 x i16> %a
@@ -8856,37 +7376,29 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) {
; NO-SIMD128: .functype not_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, -1
-; NO-SIMD128-NEXT: i32.xor $push1=, $5, $pop0
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push23=, -1
-; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop23
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push22=, -1
-; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop22
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push21=, -1
-; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop21
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push6=, 14
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.const $push20=, -1
-; NO-SIMD128-NEXT: i32.xor $push5=, $8, $pop20
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-NEXT: i32.const $push19=, -1
-; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop19
-; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 10
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.const $push18=, -1
-; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop18
-; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 6
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.const $push17=, -1
-; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop17
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
+; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop1
+; NO-SIMD128-NEXT: i32.const $push15=, -1
+; NO-SIMD128-NEXT: i32.xor $push2=, $7, $pop15
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push14=, -1
+; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop14
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push13=, -1
+; NO-SIMD128-NEXT: i32.xor $push4=, $5, $pop13
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push12=, -1
+; NO-SIMD128-NEXT: i32.xor $push5=, $4, $pop12
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop5
+; NO-SIMD128-NEXT: i32.const $push11=, -1
+; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push10=, -1
+; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop10
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop7
+; NO-SIMD128-NEXT: i32.const $push9=, -1
+; NO-SIMD128-NEXT: i32.xor $push8=, $1, $pop9
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: not_v8i16:
@@ -8895,35 +7407,27 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1
; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop23
+; NO-SIMD128-FAST-NEXT: i32.const $push15=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop15
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop22
+; NO-SIMD128-FAST-NEXT: i32.const $push14=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop14
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop21
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop20
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop19
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop18
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop17
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16
+; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4
+; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $5, $pop12
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $pop11
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push10=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $7, $pop10
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $8, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%a = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1,
i16 -1, i16 -1, i16 -1, i16 -1>
@@ -8948,45 +7452,37 @@ define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128: .functype andnot_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, -1
-; NO-SIMD128-NEXT: i32.xor $push1=, $13, $pop0
-; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop1
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push31=, -1
-; NO-SIMD128-NEXT: i32.xor $push3=, $11, $pop31
-; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push30=, -1
-; NO-SIMD128-NEXT: i32.xor $push5=, $10, $pop30
-; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push29=, -1
-; NO-SIMD128-NEXT: i32.xor $push7=, $9, $pop29
-; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push11=, 14
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.const $push28=, -1
-; NO-SIMD128-NEXT: i32.xor $push9=, $16, $pop28
-; NO-SIMD128-NEXT: i32.and $push10=, $8, $pop9
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push15=, 12
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.const $push27=, -1
-; NO-SIMD128-NEXT: i32.xor $push13=, $15, $pop27
-; NO-SIMD128-NEXT: i32.and $push14=, $7, $pop13
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push19=, 10
-; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19
-; NO-SIMD128-NEXT: i32.const $push26=, -1
-; NO-SIMD128-NEXT: i32.xor $push17=, $14, $pop26
-; NO-SIMD128-NEXT: i32.and $push18=, $6, $pop17
-; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18
-; NO-SIMD128-NEXT: i32.const $push23=, 6
-; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23
-; NO-SIMD128-NEXT: i32.const $push25=, -1
-; NO-SIMD128-NEXT: i32.xor $push21=, $12, $pop25
-; NO-SIMD128-NEXT: i32.and $push22=, $4, $pop21
-; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22
+; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0
+; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop1
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push23=, -1
+; NO-SIMD128-NEXT: i32.xor $push3=, $15, $pop23
+; NO-SIMD128-NEXT: i32.and $push4=, $7, $pop3
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push22=, -1
+; NO-SIMD128-NEXT: i32.xor $push5=, $14, $pop22
+; NO-SIMD128-NEXT: i32.and $push6=, $6, $pop5
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push21=, -1
+; NO-SIMD128-NEXT: i32.xor $push7=, $13, $pop21
+; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop7
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push20=, -1
+; NO-SIMD128-NEXT: i32.xor $push9=, $12, $pop20
+; NO-SIMD128-NEXT: i32.and $push10=, $4, $pop9
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop10
+; NO-SIMD128-NEXT: i32.const $push19=, -1
+; NO-SIMD128-NEXT: i32.xor $push11=, $11, $pop19
+; NO-SIMD128-NEXT: i32.and $push12=, $3, $pop11
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push18=, -1
+; NO-SIMD128-NEXT: i32.xor $push13=, $10, $pop18
+; NO-SIMD128-NEXT: i32.and $push14=, $2, $pop13
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop14
+; NO-SIMD128-NEXT: i32.const $push17=, -1
+; NO-SIMD128-NEXT: i32.xor $push15=, $9, $pop17
+; NO-SIMD128-NEXT: i32.and $push16=, $1, $pop15
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: andnot_v8i16:
@@ -8996,42 +7492,34 @@ define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $9, $pop0
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop31
+; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop23
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop22
; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $pop29
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10
-; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $13, $pop28
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $pop27
-; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $15, $pop26
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $16, $pop25
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24
+; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $12, $pop21
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $13, $pop20
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $5, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $14, $pop19
+; NO-SIMD128-FAST-NEXT: i32.and $push12=, $6, $pop11
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $15, $pop18
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $7, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $16, $pop17
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $8, $pop15
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%inv_y = xor <8 x i16> %y,
<i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -9058,62 +7546,54 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128-LABEL: bitselect_v8i16:
; NO-SIMD128: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.and $push0=, $16, $8
; NO-SIMD128-NEXT: i32.const $push1=, -1
; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop1
; NO-SIMD128-NEXT: i32.and $push3=, $24, $pop2
; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push11=, 12
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.and $push7=, $15, $7
-; NO-SIMD128-NEXT: i32.const $push47=, -1
-; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop47
-; NO-SIMD128-NEXT: i32.and $push9=, $23, $pop8
-; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push17=, 10
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.and $push13=, $14, $6
-; NO-SIMD128-NEXT: i32.const $push46=, -1
-; NO-SIMD128-NEXT: i32.xor $push14=, $6, $pop46
-; NO-SIMD128-NEXT: i32.and $push15=, $22, $pop14
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop4
+; NO-SIMD128-NEXT: i32.and $push5=, $15, $7
+; NO-SIMD128-NEXT: i32.const $push39=, -1
+; NO-SIMD128-NEXT: i32.xor $push6=, $7, $pop39
+; NO-SIMD128-NEXT: i32.and $push7=, $23, $pop6
+; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop8
+; NO-SIMD128-NEXT: i32.and $push9=, $14, $6
+; NO-SIMD128-NEXT: i32.const $push38=, -1
+; NO-SIMD128-NEXT: i32.xor $push10=, $6, $pop38
+; NO-SIMD128-NEXT: i32.and $push11=, $22, $pop10
+; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop12
+; NO-SIMD128-NEXT: i32.and $push13=, $13, $5
+; NO-SIMD128-NEXT: i32.const $push37=, -1
+; NO-SIMD128-NEXT: i32.xor $push14=, $5, $pop37
+; NO-SIMD128-NEXT: i32.and $push15=, $21, $pop14
; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15
-; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.and $push19=, $13, $5
-; NO-SIMD128-NEXT: i32.const $push45=, -1
-; NO-SIMD128-NEXT: i32.xor $push20=, $5, $pop45
-; NO-SIMD128-NEXT: i32.and $push21=, $21, $pop20
-; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop22
-; NO-SIMD128-NEXT: i32.const $push27=, 6
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.and $push23=, $12, $4
-; NO-SIMD128-NEXT: i32.const $push44=, -1
-; NO-SIMD128-NEXT: i32.xor $push24=, $4, $pop44
-; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop24
-; NO-SIMD128-NEXT: i32.or $push26=, $pop23, $pop25
-; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26
-; NO-SIMD128-NEXT: i32.and $push29=, $11, $3
-; NO-SIMD128-NEXT: i32.const $push43=, -1
-; NO-SIMD128-NEXT: i32.xor $push30=, $3, $pop43
-; NO-SIMD128-NEXT: i32.and $push31=, $19, $pop30
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop16
+; NO-SIMD128-NEXT: i32.and $push17=, $12, $4
+; NO-SIMD128-NEXT: i32.const $push36=, -1
+; NO-SIMD128-NEXT: i32.xor $push18=, $4, $pop36
+; NO-SIMD128-NEXT: i32.and $push19=, $20, $pop18
+; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop20
+; NO-SIMD128-NEXT: i32.and $push21=, $11, $3
+; NO-SIMD128-NEXT: i32.const $push35=, -1
+; NO-SIMD128-NEXT: i32.xor $push22=, $3, $pop35
+; NO-SIMD128-NEXT: i32.and $push23=, $19, $pop22
+; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop24
+; NO-SIMD128-NEXT: i32.and $push25=, $10, $2
+; NO-SIMD128-NEXT: i32.const $push34=, -1
+; NO-SIMD128-NEXT: i32.xor $push26=, $2, $pop34
+; NO-SIMD128-NEXT: i32.and $push27=, $18, $pop26
+; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop28
+; NO-SIMD128-NEXT: i32.and $push29=, $9, $1
+; NO-SIMD128-NEXT: i32.const $push33=, -1
+; NO-SIMD128-NEXT: i32.xor $push30=, $1, $pop33
+; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop30
; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop32
-; NO-SIMD128-NEXT: i32.and $push33=, $10, $2
-; NO-SIMD128-NEXT: i32.const $push42=, -1
-; NO-SIMD128-NEXT: i32.xor $push34=, $2, $pop42
-; NO-SIMD128-NEXT: i32.and $push35=, $18, $pop34
-; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop36
-; NO-SIMD128-NEXT: i32.and $push37=, $9, $1
-; NO-SIMD128-NEXT: i32.const $push41=, -1
-; NO-SIMD128-NEXT: i32.xor $push38=, $1, $pop41
-; NO-SIMD128-NEXT: i32.and $push39=, $17, $pop38
-; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop40
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop32
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_v8i16:
@@ -9126,55 +7606,47 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4
; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $2
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop39
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $18, $pop6
; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8
; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $3
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop46
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop38
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $19, $pop10
; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $4
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop45
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop37
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $20, $pop14
; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $5
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop44
-; NO-SIMD128-FAST-NEXT: i32.and $push21=, $21, $pop20
-; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $6
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop43
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $22, $pop24
-; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $7
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop42
-; NO-SIMD128-FAST-NEXT: i32.and $push31=, $23, $pop30
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $5
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop36
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop18
+; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $6
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop35
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop22
+; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $7
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop34
+; NO-SIMD128-FAST-NEXT: i32.and $push27=, $23, $pop26
+; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $8
+; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop33
+; NO-SIMD128-FAST-NEXT: i32.and $push31=, $24, $pop30
; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $8
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop41
-; NO-SIMD128-FAST-NEXT: i32.and $push37=, $24, $pop36
-; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32
; NO-SIMD128-FAST-NEXT: return
%masked_v1 = and <8 x i16> %v1, %c
%inv_mask = xor <8 x i16>
@@ -9203,46 +7675,38 @@ define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2
; NO-SIMD128-LABEL: bitselect_xor_v8i16:
; NO-SIMD128: .functype bitselect_xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push3=, 14
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
; NO-SIMD128-NEXT: i32.xor $push0=, $16, $24
; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $8
; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $24
-; NO-SIMD128-NEXT: i32.store16 0($pop4), $pop2
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.xor $push5=, $15, $23
-; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $7
-; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $23
-; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7
-; NO-SIMD128-NEXT: i32.const $push13=, 10
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22
-; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $6
-; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22
-; NO-SIMD128-NEXT: i32.store16 0($pop14), $pop12
-; NO-SIMD128-NEXT: i32.xor $push15=, $13, $21
-; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $5
-; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $21
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop17
-; NO-SIMD128-NEXT: i32.const $push21=, 6
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
-; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20
-; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $4
-; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20
-; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.xor $push23=, $11, $19
-; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $3
-; NO-SIMD128-NEXT: i32.xor $push25=, $pop24, $19
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop25
-; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18
-; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $2
-; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop28
-; NO-SIMD128-NEXT: i32.xor $push29=, $9, $17
-; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $1
-; NO-SIMD128-NEXT: i32.xor $push31=, $pop30, $17
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop31
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop2
+; NO-SIMD128-NEXT: i32.xor $push3=, $15, $23
+; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $7
+; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $23
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop5
+; NO-SIMD128-NEXT: i32.xor $push6=, $14, $22
+; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $6
+; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $22
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop8
+; NO-SIMD128-NEXT: i32.xor $push9=, $13, $21
+; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $5
+; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $21
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop11
+; NO-SIMD128-NEXT: i32.xor $push12=, $12, $20
+; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $4
+; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $20
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop14
+; NO-SIMD128-NEXT: i32.xor $push15=, $11, $19
+; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $3
+; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $19
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop17
+; NO-SIMD128-NEXT: i32.xor $push18=, $10, $18
+; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $2
+; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $18
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop20
+; NO-SIMD128-NEXT: i32.xor $push21=, $9, $17
+; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $1
+; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $17
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop23
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_xor_v8i16:
@@ -9260,34 +7724,26 @@ define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $19
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $20
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4
-; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $20
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13
-; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $13, $21
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5
-; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $21
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $14, $22
-; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6
-; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $22
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $15, $23
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7
-; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $23
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $16, $24
-; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8
-; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $24
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $20
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $20
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $21
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $21
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $22
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6
+; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $22
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17
+; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $15, $23
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7
+; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $23
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $16, $24
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8
+; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $24
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23
; NO-SIMD128-FAST-NEXT: return
%xor1 = xor <8 x i16> %v1, %v2
%and = and <8 x i16> %xor1, %c
@@ -9314,62 +7770,54 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x
; NO-SIMD128-LABEL: bitselect_xor_reversed_v8i16:
; NO-SIMD128: .functype bitselect_xor_reversed_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 14
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.xor $push2=, $16, $24
; NO-SIMD128-NEXT: i32.const $push0=, -1
; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0
; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1
; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $24
-; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push11=, 12
-; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11
-; NO-SIMD128-NEXT: i32.xor $push8=, $15, $23
-; NO-SIMD128-NEXT: i32.const $push47=, -1
-; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop47
-; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $23
-; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10
-; NO-SIMD128-NEXT: i32.const $push17=, 10
-; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-NEXT: i32.xor $push14=, $14, $22
-; NO-SIMD128-NEXT: i32.const $push46=, -1
-; NO-SIMD128-NEXT: i32.xor $push13=, $6, $pop46
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop4
+; NO-SIMD128-NEXT: i32.xor $push6=, $15, $23
+; NO-SIMD128-NEXT: i32.const $push39=, -1
+; NO-SIMD128-NEXT: i32.xor $push5=, $7, $pop39
+; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5
+; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $23
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop8
+; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22
+; NO-SIMD128-NEXT: i32.const $push38=, -1
+; NO-SIMD128-NEXT: i32.xor $push9=, $6, $pop38
+; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9
+; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop12
+; NO-SIMD128-NEXT: i32.xor $push14=, $13, $21
+; NO-SIMD128-NEXT: i32.const $push37=, -1
+; NO-SIMD128-NEXT: i32.xor $push13=, $5, $pop37
; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $22
-; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16
-; NO-SIMD128-NEXT: i32.xor $push20=, $13, $21
-; NO-SIMD128-NEXT: i32.const $push45=, -1
-; NO-SIMD128-NEXT: i32.xor $push19=, $5, $pop45
-; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19
-; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $21
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop22
-; NO-SIMD128-NEXT: i32.const $push27=, 6
-; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-NEXT: i32.xor $push24=, $12, $20
-; NO-SIMD128-NEXT: i32.const $push44=, -1
-; NO-SIMD128-NEXT: i32.xor $push23=, $4, $pop44
-; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $pop23
-; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $20
-; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26
-; NO-SIMD128-NEXT: i32.xor $push30=, $11, $19
-; NO-SIMD128-NEXT: i32.const $push43=, -1
-; NO-SIMD128-NEXT: i32.xor $push29=, $3, $pop43
+; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $21
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop16
+; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20
+; NO-SIMD128-NEXT: i32.const $push36=, -1
+; NO-SIMD128-NEXT: i32.xor $push17=, $4, $pop36
+; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $pop17
+; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop20
+; NO-SIMD128-NEXT: i32.xor $push22=, $11, $19
+; NO-SIMD128-NEXT: i32.const $push35=, -1
+; NO-SIMD128-NEXT: i32.xor $push21=, $3, $pop35
+; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $pop21
+; NO-SIMD128-NEXT: i32.xor $push24=, $pop23, $19
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop24
+; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18
+; NO-SIMD128-NEXT: i32.const $push34=, -1
+; NO-SIMD128-NEXT: i32.xor $push25=, $2, $pop34
+; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25
+; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop28
+; NO-SIMD128-NEXT: i32.xor $push30=, $9, $17
+; NO-SIMD128-NEXT: i32.const $push33=, -1
+; NO-SIMD128-NEXT: i32.xor $push29=, $1, $pop33
; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29
-; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $19
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop32
-; NO-SIMD128-NEXT: i32.xor $push34=, $10, $18
-; NO-SIMD128-NEXT: i32.const $push42=, -1
-; NO-SIMD128-NEXT: i32.xor $push33=, $2, $pop42
-; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33
-; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $18
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop36
-; NO-SIMD128-NEXT: i32.xor $push38=, $9, $17
-; NO-SIMD128-NEXT: i32.const $push41=, -1
-; NO-SIMD128-NEXT: i32.xor $push37=, $1, $pop41
-; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37
-; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $17
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop40
+; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $17
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop32
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v8i16:
@@ -9382,55 +7830,47 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x
; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $17
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $10, $18
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop39
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $18
; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8
; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $19
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop46
+; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop38
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9
; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $19
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $12, $20
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop45
+; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop37
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13
; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $20
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16
-; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $13, $21
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop44
-; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19
-; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $21
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $14, $22
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop43
-; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $22
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33
-; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $15, $23
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop42
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16
+; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $13, $21
+; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $5, $pop36
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $pop17
+; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $21
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $14, $22
+; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $6, $pop35
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop21
+; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $pop23, $22
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24
+; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $15, $23
+; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $7, $pop34
+; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $pop25
+; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $pop27, $23
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28
+; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $16, $24
+; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33
; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29
-; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $23
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39
-; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $16, $24
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop41
-; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35
-; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $24
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38
+; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $24
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32
; NO-SIMD128-FAST-NEXT: return
%xor1 = xor <8 x i16> %v1, %v2
%notc = xor <8 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1,
@@ -9458,46 +7898,38 @@ define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128-LABEL: extmul_low_s_v8i16:
; NO-SIMD128: .functype extmul_low_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.extend8_s $push1=, $5
-; NO-SIMD128-NEXT: i32.extend8_s $push0=, $21
+; NO-SIMD128-NEXT: i32.extend8_s $push1=, $8
+; NO-SIMD128-NEXT: i32.extend8_s $push0=, $24
; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop2
-; NO-SIMD128-NEXT: i32.extend8_s $push4=, $3
-; NO-SIMD128-NEXT: i32.extend8_s $push3=, $19
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop2
+; NO-SIMD128-NEXT: i32.extend8_s $push4=, $7
+; NO-SIMD128-NEXT: i32.extend8_s $push3=, $23
; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop5
-; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2
-; NO-SIMD128-NEXT: i32.extend8_s $push6=, $18
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop5
+; NO-SIMD128-NEXT: i32.extend8_s $push7=, $6
+; NO-SIMD128-NEXT: i32.extend8_s $push6=, $22
; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop8
-; NO-SIMD128-NEXT: i32.extend8_s $push10=, $1
-; NO-SIMD128-NEXT: i32.extend8_s $push9=, $17
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop8
+; NO-SIMD128-NEXT: i32.extend8_s $push10=, $5
+; NO-SIMD128-NEXT: i32.extend8_s $push9=, $21
; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 14
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.extend8_s $push13=, $8
-; NO-SIMD128-NEXT: i32.extend8_s $push12=, $24
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop11
+; NO-SIMD128-NEXT: i32.extend8_s $push13=, $4
+; NO-SIMD128-NEXT: i32.extend8_s $push12=, $20
; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push20=, 12
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.extend8_s $push18=, $7
-; NO-SIMD128-NEXT: i32.extend8_s $push17=, $23
-; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17
-; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19
-; NO-SIMD128-NEXT: i32.const $push25=, 10
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.extend8_s $push23=, $6
-; NO-SIMD128-NEXT: i32.extend8_s $push22=, $22
-; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22
-; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push30=, 6
-; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-NEXT: i32.extend8_s $push28=, $4
-; NO-SIMD128-NEXT: i32.extend8_s $push27=, $20
-; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27
-; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop14
+; NO-SIMD128-NEXT: i32.extend8_s $push16=, $3
+; NO-SIMD128-NEXT: i32.extend8_s $push15=, $19
+; NO-SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop17
+; NO-SIMD128-NEXT: i32.extend8_s $push19=, $2
+; NO-SIMD128-NEXT: i32.extend8_s $push18=, $18
+; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop20
+; NO-SIMD128-NEXT: i32.extend8_s $push22=, $1
+; NO-SIMD128-NEXT: i32.extend8_s $push21=, $17
+; NO-SIMD128-NEXT: i32.mul $push23=, $pop22, $pop21
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop23
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: extmul_low_s_v8i16:
@@ -9515,34 +7947,26 @@ define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $19
; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $4
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $20
-; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $21
-; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $6
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $22
-; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23
-; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $8
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $24
-; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $20
+; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $5
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $21
+; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $pop13, $pop12
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $6
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $22
+; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $23
+; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $8
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $24
+; NO-SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23
; NO-SIMD128-FAST-NEXT: return
%low1 = shufflevector <16 x i8> %v1, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -9572,46 +7996,38 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128-LABEL: extmul_high_s_v8i16:
; NO-SIMD128: .functype extmul_high_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.extend8_s $push1=, $13
-; NO-SIMD128-NEXT: i32.extend8_s $push0=, $29
+; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16
+; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32
; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop2
-; NO-SIMD128-NEXT: i32.extend8_s $push4=, $11
-; NO-SIMD128-NEXT: i32.extend8_s $push3=, $27
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop2
+; NO-SIMD128-NEXT: i32.extend8_s $push4=, $15
+; NO-SIMD128-NEXT: i32.extend8_s $push3=, $31
; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop5
-; NO-SIMD128-NEXT: i32.extend8_s $push7=, $10
-; NO-SIMD128-NEXT: i32.extend8_s $push6=, $26
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop5
+; NO-SIMD128-NEXT: i32.extend8_s $push7=, $14
+; NO-SIMD128-NEXT: i32.extend8_s $push6=, $30
; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop8
-; NO-SIMD128-NEXT: i32.extend8_s $push10=, $9
-; NO-SIMD128-NEXT: i32.extend8_s $push9=, $25
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop8
+; NO-SIMD128-NEXT: i32.extend8_s $push10=, $13
+; NO-SIMD128-NEXT: i32.extend8_s $push9=, $29
; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop11
-; NO-SIMD128-NEXT: i32.const $push15=, 14
-; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15
-; NO-SIMD128-NEXT: i32.extend8_s $push13=, $16
-; NO-SIMD128-NEXT: i32.extend8_s $push12=, $32
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop11
+; NO-SIMD128-NEXT: i32.extend8_s $push13=, $12
+; NO-SIMD128-NEXT: i32.extend8_s $push12=, $28
; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12
-; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14
-; NO-SIMD128-NEXT: i32.const $push20=, 12
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.extend8_s $push18=, $15
-; NO-SIMD128-NEXT: i32.extend8_s $push17=, $31
-; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17
-; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19
-; NO-SIMD128-NEXT: i32.const $push25=, 10
-; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25
-; NO-SIMD128-NEXT: i32.extend8_s $push23=, $14
-; NO-SIMD128-NEXT: i32.extend8_s $push22=, $30
-; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22
-; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24
-; NO-SIMD128-NEXT: i32.const $push30=, 6
-; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30
-; NO-SIMD128-NEXT: i32.extend8_s $push28=, $12
-; NO-SIMD128-NEXT: i32.extend8_s $push27=, $28
-; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27
-; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop14
+; NO-SIMD128-NEXT: i32.extend8_s $push16=, $11
+; NO-SIMD128-NEXT: i32.extend8_s $push15=, $27
+; NO-SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop17
+; NO-SIMD128-NEXT: i32.extend8_s $push19=, $10
+; NO-SIMD128-NEXT: i32.extend8_s $push18=, $26
+; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop20
+; NO-SIMD128-NEXT: i32.extend8_s $push22=, $9
+; NO-SIMD128-NEXT: i32.extend8_s $push21=, $25
+; NO-SIMD128-NEXT: i32.mul $push23=, $pop22, $pop21
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop23
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: extmul_high_s_v8i16:
@@ -9629,34 +8045,26 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $27
; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6
; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $12
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $28
-; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $13
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $29
-; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $14
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $30
-; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21
-; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $15
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $31
-; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26
-; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $16
-; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $32
-; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $12
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $28
+; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $13
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $29
+; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $pop13, $pop12
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $14
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $30
+; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $15
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $31
+; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $16
+; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $32
+; NO-SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23
; NO-SIMD128-FAST-NEXT: return
%high1 = shufflevector <16 x i8> %v1, <16 x i8> undef,
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -9687,61 +8095,53 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128: .functype extmul_low_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0
-; NO-SIMD128-NEXT: i32.const $push47=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $21, $pop47
-; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push46=, 255
-; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46
-; NO-SIMD128-NEXT: i32.const $push45=, 255
-; NO-SIMD128-NEXT: i32.and $push4=, $19, $pop45
-; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push44=, 255
-; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44
-; NO-SIMD128-NEXT: i32.const $push43=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop43
-; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop9
-; NO-SIMD128-NEXT: i32.const $push42=, 255
-; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42
-; NO-SIMD128-NEXT: i32.const $push41=, 255
-; NO-SIMD128-NEXT: i32.and $push10=, $17, $pop41
-; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop12
-; NO-SIMD128-NEXT: i32.const $push16=, 14
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.const $push40=, 255
-; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40
+; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0
; NO-SIMD128-NEXT: i32.const $push39=, 255
-; NO-SIMD128-NEXT: i32.and $push13=, $24, $pop39
-; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push21=, 12
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
+; NO-SIMD128-NEXT: i32.and $push1=, $24, $pop39
+; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop3
; NO-SIMD128-NEXT: i32.const $push38=, 255
-; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38
+; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop38
; NO-SIMD128-NEXT: i32.const $push37=, 255
-; NO-SIMD128-NEXT: i32.and $push18=, $23, $pop37
-; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18
-; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push26=, 10
-; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26
+; NO-SIMD128-NEXT: i32.and $push4=, $23, $pop37
+; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop6
; NO-SIMD128-NEXT: i32.const $push36=, 255
-; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36
+; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop36
; NO-SIMD128-NEXT: i32.const $push35=, 255
-; NO-SIMD128-NEXT: i32.and $push23=, $22, $pop35
-; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23
-; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-NEXT: i32.const $push31=, 6
-; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31
+; NO-SIMD128-NEXT: i32.and $push7=, $22, $pop35
+; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop9
; NO-SIMD128-NEXT: i32.const $push34=, 255
-; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34
+; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop34
; NO-SIMD128-NEXT: i32.const $push33=, 255
-; NO-SIMD128-NEXT: i32.and $push28=, $20, $pop33
-; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28
-; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30
+; NO-SIMD128-NEXT: i32.and $push10=, $21, $pop33
+; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push32=, 255
+; NO-SIMD128-NEXT: i32.and $push14=, $4, $pop32
+; NO-SIMD128-NEXT: i32.const $push31=, 255
+; NO-SIMD128-NEXT: i32.and $push13=, $20, $pop31
+; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop15
+; NO-SIMD128-NEXT: i32.const $push30=, 255
+; NO-SIMD128-NEXT: i32.and $push17=, $3, $pop30
+; NO-SIMD128-NEXT: i32.const $push29=, 255
+; NO-SIMD128-NEXT: i32.and $push16=, $19, $pop29
+; NO-SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop18
+; NO-SIMD128-NEXT: i32.const $push28=, 255
+; NO-SIMD128-NEXT: i32.and $push20=, $2, $pop28
+; NO-SIMD128-NEXT: i32.const $push27=, 255
+; NO-SIMD128-NEXT: i32.and $push19=, $18, $pop27
+; NO-SIMD128-NEXT: i32.mul $push21=, $pop20, $pop19
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop21
+; NO-SIMD128-NEXT: i32.const $push26=, 255
+; NO-SIMD128-NEXT: i32.and $push23=, $1, $pop26
+; NO-SIMD128-NEXT: i32.const $push25=, 255
+; NO-SIMD128-NEXT: i32.and $push22=, $17, $pop25
+; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop24
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: extmul_low_u_v8i16:
@@ -9749,60 +8149,52 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop39
; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop45
-; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
-; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop43
-; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7
-; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop41
-; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop39
-; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop38
; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop37
-; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop37
+; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
+; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop36
; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop35
-; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop35
+; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9
; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop34
; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop33
-; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop33
+; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push32=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop32
+; NO-SIMD128-FAST-NEXT: i32.const $push31=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop31
+; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.const $push30=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push29=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop29
+; NO-SIMD128-FAST-NEXT: i32.mul $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.const $push28=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push27=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop27
+; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push26=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop26
+; NO-SIMD128-FAST-NEXT: i32.const $push25=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop25
+; NO-SIMD128-FAST-NEXT: i32.mul $push24=, $pop23, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24
; NO-SIMD128-FAST-NEXT: return
%low1 = shufflevector <16 x i8> %v1, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -9833,61 +8225,53 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128: .functype extmul_high_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 255
-; NO-SIMD128-NEXT: i32.and $push2=, $13, $pop0
-; NO-SIMD128-NEXT: i32.const $push47=, 255
-; NO-SIMD128-NEXT: i32.and $push1=, $29, $pop47
-; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1
-; NO-SIMD128-NEXT: i32.store16 8($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push46=, 255
-; NO-SIMD128-NEXT: i32.and $push5=, $11, $pop46
-; NO-SIMD128-NEXT: i32.const $push45=, 255
-; NO-SIMD128-NEXT: i32.and $push4=, $27, $pop45
-; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4
-; NO-SIMD128-NEXT: i32.store16 4($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push44=, 255
-; NO-SIMD128-NEXT: i32.and $push8=, $10, $pop44
-; NO-SIMD128-NEXT: i32.const $push43=, 255
-; NO-SIMD128-NEXT: i32.and $push7=, $26, $pop43
-; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.store16 2($0), $pop9
-; NO-SIMD128-NEXT: i32.const $push42=, 255
-; NO-SIMD128-NEXT: i32.and $push11=, $9, $pop42
-; NO-SIMD128-NEXT: i32.const $push41=, 255
-; NO-SIMD128-NEXT: i32.and $push10=, $25, $pop41
-; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.store16 0($0), $pop12
-; NO-SIMD128-NEXT: i32.const $push16=, 14
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.const $push40=, 255
-; NO-SIMD128-NEXT: i32.and $push14=, $16, $pop40
+; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0
; NO-SIMD128-NEXT: i32.const $push39=, 255
-; NO-SIMD128-NEXT: i32.and $push13=, $32, $pop39
-; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13
-; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15
-; NO-SIMD128-NEXT: i32.const $push21=, 12
-; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21
+; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop39
+; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1
+; NO-SIMD128-NEXT: i32.store16 14($0), $pop3
; NO-SIMD128-NEXT: i32.const $push38=, 255
-; NO-SIMD128-NEXT: i32.and $push19=, $15, $pop38
+; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop38
; NO-SIMD128-NEXT: i32.const $push37=, 255
-; NO-SIMD128-NEXT: i32.and $push18=, $31, $pop37
-; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18
-; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-NEXT: i32.const $push26=, 10
-; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26
+; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop37
+; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.store16 12($0), $pop6
; NO-SIMD128-NEXT: i32.const $push36=, 255
-; NO-SIMD128-NEXT: i32.and $push24=, $14, $pop36
+; NO-SIMD128-NEXT: i32.and $push8=, $14, $pop36
; NO-SIMD128-NEXT: i32.const $push35=, 255
-; NO-SIMD128-NEXT: i32.and $push23=, $30, $pop35
-; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23
-; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-NEXT: i32.const $push31=, 6
-; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31
+; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop35
+; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
+; NO-SIMD128-NEXT: i32.store16 10($0), $pop9
; NO-SIMD128-NEXT: i32.const $push34=, 255
-; NO-SIMD128-NEXT: i32.and $push29=, $12, $pop34
+; NO-SIMD128-NEXT: i32.and $push11=, $13, $pop34
; NO-SIMD128-NEXT: i32.const $push33=, 255
-; NO-SIMD128-NEXT: i32.and $push28=, $28, $pop33
-; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28
-; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30
+; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop33
+; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10
+; NO-SIMD128-NEXT: i32.store16 8($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push32=, 255
+; NO-SIMD128-NEXT: i32.and $push14=, $12, $pop32
+; NO-SIMD128-NEXT: i32.const $push31=, 255
+; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop31
+; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.store16 6($0), $pop15
+; NO-SIMD128-NEXT: i32.const $push30=, 255
+; NO-SIMD128-NEXT: i32.and $push17=, $11, $pop30
+; NO-SIMD128-NEXT: i32.const $push29=, 255
+; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop29
+; NO-SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16
+; NO-SIMD128-NEXT: i32.store16 4($0), $pop18
+; NO-SIMD128-NEXT: i32.const $push28=, 255
+; NO-SIMD128-NEXT: i32.and $push20=, $10, $pop28
+; NO-SIMD128-NEXT: i32.const $push27=, 255
+; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop27
+; NO-SIMD128-NEXT: i32.mul $push21=, $pop20, $pop19
+; NO-SIMD128-NEXT: i32.store16 2($0), $pop21
+; NO-SIMD128-NEXT: i32.const $push26=, 255
+; NO-SIMD128-NEXT: i32.and $push23=, $9, $pop26
+; NO-SIMD128-NEXT: i32.const $push25=, 255
+; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop25
+; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22
+; NO-SIMD128-NEXT: i32.store16 0($0), $pop24
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: extmul_high_u_v8i16:
@@ -9895,60 +8279,52 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $9, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop47
+; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop39
; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop46
-; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop45
-; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
-; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop44
-; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop43
-; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7
-; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
-; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop42
-; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop41
-; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push16=, $13, $pop40
-; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push15=, $29, $pop39
-; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15
-; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10
-; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21
; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push19=, $14, $pop38
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop38
; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push18=, $30, $pop37
-; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop37
+; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
+; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push24=, $15, $pop36
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop36
; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push23=, $31, $pop35
-; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25
-; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14
-; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop35
+; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9
; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop34
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop34
; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255
-; NO-SIMD128-FAST-NEXT: i32.and $push28=, $32, $pop33
-; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28
-; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop33
+; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10
+; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push32=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push14=, $13, $pop32
+; NO-SIMD128-FAST-NEXT: i32.const $push31=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push13=, $29, $pop31
+; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13
+; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15
+; NO-SIMD128-FAST-NEXT: i32.const $push30=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push17=, $14, $pop30
+; NO-SIMD128-FAST-NEXT: i32.const $push29=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push16=, $30, $pop29
+; NO-SIMD128-FAST-NEXT: i32.mul $push18=, $pop17, $pop16
+; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18
+; NO-SIMD128-FAST-NEXT: i32.const $push28=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push20=, $15, $pop28
+; NO-SIMD128-FAST-NEXT: i32.const $push27=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push19=, $31, $pop27
+; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19
+; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push26=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop26
+; NO-SIMD128-FAST-NEXT: i32.const $push25=, 255
+; NO-SIMD128-FAST-NEXT: i32.and $push22=, $32, $pop25
+; NO-SIMD128-FAST-NEXT: i32.mul $push24=, $pop23, $pop22
+; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24
; NO-SIMD128-FAST-NEXT: return
%high1 = shufflevector <16 x i8> %v1, <16 x i8> undef,
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -9979,16 +8355,14 @@ define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: add_v4i32:
; NO-SIMD128: .functype add_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.add $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.add $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.add $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.add $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.add $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.add $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.add $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.add $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: add_v4i32:
@@ -10000,10 +8374,8 @@ define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = add <4 x i32> %x, %y
ret <4 x i32> %a
@@ -10025,16 +8397,14 @@ define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: sub_v4i32:
; NO-SIMD128: .functype sub_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.sub $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.sub $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.sub $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.sub $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.sub $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.sub $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.sub $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.sub $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: sub_v4i32:
@@ -10046,10 +8416,8 @@ define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = sub <4 x i32> %x, %y
ret <4 x i32> %a
@@ -10071,16 +8439,14 @@ define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: mul_v4i32:
; NO-SIMD128: .functype mul_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.mul $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.mul $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.mul $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.mul $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.mul $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.mul $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.mul $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.mul $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: mul_v4i32:
@@ -10092,10 +8458,8 @@ define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = mul <4 x i32> %x, %y
ret <4 x i32> %a
@@ -10117,20 +8481,18 @@ define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: min_s_v4i32:
; NO-SIMD128: .functype min_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.lt_s $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0
-; NO-SIMD128-NEXT: i32.store 8($0), $pop1
-; NO-SIMD128-NEXT: i32.lt_s $push2=, $2, $6
-; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2
-; NO-SIMD128-NEXT: i32.store 4($0), $pop3
-; NO-SIMD128-NEXT: i32.lt_s $push4=, $1, $5
-; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4
-; NO-SIMD128-NEXT: i32.store 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.lt_s $push6=, $4, $8
-; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6
-; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7
+; NO-SIMD128-NEXT: i32.lt_s $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: i32.store 12($0), $pop1
+; NO-SIMD128-NEXT: i32.lt_s $push2=, $3, $7
+; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: i32.store 8($0), $pop3
+; NO-SIMD128-NEXT: i32.lt_s $push4=, $2, $6
+; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: i32.store 4($0), $pop5
+; NO-SIMD128-NEXT: i32.lt_s $push6=, $1, $5
+; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: i32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_s_v4i32:
@@ -10145,11 +8507,9 @@ define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.lt_s $push4=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
; NO-SIMD128-FAST-NEXT: i32.lt_s $push6=, $4, $8
; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%c = icmp slt <4 x i32> %x, %y
%a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
@@ -10172,20 +8532,18 @@ define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: min_u_v4i32:
; NO-SIMD128: .functype min_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.lt_u $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0
-; NO-SIMD128-NEXT: i32.store 8($0), $pop1
-; NO-SIMD128-NEXT: i32.lt_u $push2=, $2, $6
-; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2
-; NO-SIMD128-NEXT: i32.store 4($0), $pop3
-; NO-SIMD128-NEXT: i32.lt_u $push4=, $1, $5
-; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4
-; NO-SIMD128-NEXT: i32.store 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.lt_u $push6=, $4, $8
-; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6
-; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7
+; NO-SIMD128-NEXT: i32.lt_u $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: i32.store 12($0), $pop1
+; NO-SIMD128-NEXT: i32.lt_u $push2=, $3, $7
+; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: i32.store 8($0), $pop3
+; NO-SIMD128-NEXT: i32.lt_u $push4=, $2, $6
+; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: i32.store 4($0), $pop5
+; NO-SIMD128-NEXT: i32.lt_u $push6=, $1, $5
+; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: i32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_u_v4i32:
@@ -10200,11 +8558,9 @@ define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.lt_u $push4=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
; NO-SIMD128-FAST-NEXT: i32.lt_u $push6=, $4, $8
; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%c = icmp ult <4 x i32> %x, %y
%a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
@@ -10227,20 +8583,18 @@ define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: max_s_v4i32:
; NO-SIMD128: .functype max_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.gt_s $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0
-; NO-SIMD128-NEXT: i32.store 8($0), $pop1
-; NO-SIMD128-NEXT: i32.gt_s $push2=, $2, $6
-; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2
-; NO-SIMD128-NEXT: i32.store 4($0), $pop3
-; NO-SIMD128-NEXT: i32.gt_s $push4=, $1, $5
-; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4
-; NO-SIMD128-NEXT: i32.store 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.gt_s $push6=, $4, $8
-; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6
-; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7
+; NO-SIMD128-NEXT: i32.gt_s $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: i32.store 12($0), $pop1
+; NO-SIMD128-NEXT: i32.gt_s $push2=, $3, $7
+; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: i32.store 8($0), $pop3
+; NO-SIMD128-NEXT: i32.gt_s $push4=, $2, $6
+; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: i32.store 4($0), $pop5
+; NO-SIMD128-NEXT: i32.gt_s $push6=, $1, $5
+; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: i32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_s_v4i32:
@@ -10255,11 +8609,9 @@ define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.gt_s $push4=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
; NO-SIMD128-FAST-NEXT: i32.gt_s $push6=, $4, $8
; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%c = icmp sgt <4 x i32> %x, %y
%a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
@@ -10282,20 +8634,18 @@ define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: max_u_v4i32:
; NO-SIMD128: .functype max_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.gt_u $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0
-; NO-SIMD128-NEXT: i32.store 8($0), $pop1
-; NO-SIMD128-NEXT: i32.gt_u $push2=, $2, $6
-; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2
-; NO-SIMD128-NEXT: i32.store 4($0), $pop3
-; NO-SIMD128-NEXT: i32.gt_u $push4=, $1, $5
-; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4
-; NO-SIMD128-NEXT: i32.store 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.gt_u $push6=, $4, $8
-; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6
-; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7
+; NO-SIMD128-NEXT: i32.gt_u $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: i32.store 12($0), $pop1
+; NO-SIMD128-NEXT: i32.gt_u $push2=, $3, $7
+; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: i32.store 8($0), $pop3
+; NO-SIMD128-NEXT: i32.gt_u $push4=, $2, $6
+; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: i32.store 4($0), $pop5
+; NO-SIMD128-NEXT: i32.gt_u $push6=, $1, $5
+; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: i32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_u_v4i32:
@@ -10310,11 +8660,9 @@ define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.gt_u $push4=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
; NO-SIMD128-FAST-NEXT: i32.gt_u $push6=, $4, $8
; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%c = icmp ugt <4 x i32> %x, %y
%a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
@@ -10337,63 +8685,59 @@ define <4 x i32> @abs_v4i32(<4 x i32> %x) {
; NO-SIMD128-LABEL: abs_v4i32:
; NO-SIMD128: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
; NO-SIMD128-NEXT: i32.const $push0=, 31
-; NO-SIMD128-NEXT: i32.shr_s $push21=, $4, $pop0
-; NO-SIMD128-NEXT: local.tee $push20=, $5=, $pop21
-; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop20
+; NO-SIMD128-NEXT: i32.shr_s $push19=, $4, $pop0
+; NO-SIMD128-NEXT: local.tee $push18=, $5=, $pop19
+; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop18
; NO-SIMD128-NEXT: i32.sub $push2=, $pop1, $5
-; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2
-; NO-SIMD128-NEXT: i32.const $push19=, 31
-; NO-SIMD128-NEXT: i32.shr_s $push18=, $3, $pop19
-; NO-SIMD128-NEXT: local.tee $push17=, $4=, $pop18
-; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop17
+; NO-SIMD128-NEXT: i32.store 12($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push17=, 31
+; NO-SIMD128-NEXT: i32.shr_s $push16=, $3, $pop17
+; NO-SIMD128-NEXT: local.tee $push15=, $4=, $pop16
+; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop15
+; NO-SIMD128-NEXT: i32.sub $push4=, $pop3, $4
+; NO-SIMD128-NEXT: i32.store 8($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push14=, 31
+; NO-SIMD128-NEXT: i32.shr_s $push13=, $2, $pop14
+; NO-SIMD128-NEXT: local.tee $push12=, $4=, $pop13
+; NO-SIMD128-NEXT: i32.xor $push5=, $2, $pop12
; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $4
-; NO-SIMD128-NEXT: i32.store 8($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push16=, 31
-; NO-SIMD128-NEXT: i32.shr_s $push15=, $2, $pop16
-; NO-SIMD128-NEXT: local.tee $push14=, $4=, $pop15
-; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop14
+; NO-SIMD128-NEXT: i32.store 4($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push11=, 31
+; NO-SIMD128-NEXT: i32.shr_s $push10=, $1, $pop11
+; NO-SIMD128-NEXT: local.tee $push9=, $4=, $pop10
+; NO-SIMD128-NEXT: i32.xor $push7=, $1, $pop9
; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $4
-; NO-SIMD128-NEXT: i32.store 4($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push13=, 31
-; NO-SIMD128-NEXT: i32.shr_s $push12=, $1, $pop13
-; NO-SIMD128-NEXT: local.tee $push11=, $4=, $pop12
-; NO-SIMD128-NEXT: i32.xor $push9=, $1, $pop11
-; NO-SIMD128-NEXT: i32.sub $push10=, $pop9, $4
-; NO-SIMD128-NEXT: i32.store 0($0), $pop10
+; NO-SIMD128-NEXT: i32.store 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: abs_v4i32:
; NO-SIMD128-FAST: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 31
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: local.tee $push20=, $5=, $pop21
-; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop20
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push19=, $1, $pop0
+; NO-SIMD128-FAST-NEXT: local.tee $push18=, $5=, $pop19
+; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop18
; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop1, $5
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, 31
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $2, $pop19
-; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18
-; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop17
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, 31
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $2, $pop17
+; NO-SIMD128-FAST-NEXT: local.tee $push15=, $1=, $pop16
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop15
; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop3, $1
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push16=, 31
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $3, $pop16
-; NO-SIMD128-FAST-NEXT: local.tee $push14=, $2=, $pop15
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push14=, 31
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push13=, $3, $pop14
+; NO-SIMD128-FAST-NEXT: local.tee $push12=, $2=, $pop13
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop12
; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $2
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 31
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $4, $pop13
-; NO-SIMD128-FAST-NEXT: local.tee $push11=, $0=, $pop12
-; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop11
-; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $0
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push11=, 31
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $4, $pop11
+; NO-SIMD128-FAST-NEXT: local.tee $push9=, $3=, $pop10
+; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop9
+; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $3
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%a = sub <4 x i32> zeroinitializer, %x
%b = icmp slt <4 x i32> %x, zeroinitializer
@@ -10418,19 +8762,17 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) {
; NO-SIMD128: .functype neg_v4i32 (i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 0
-; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $3
-; NO-SIMD128-NEXT: i32.store 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push9=, 0
-; NO-SIMD128-NEXT: i32.sub $push2=, $pop9, $2
-; NO-SIMD128-NEXT: i32.store 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push8=, 0
-; NO-SIMD128-NEXT: i32.sub $push3=, $pop8, $1
-; NO-SIMD128-NEXT: i32.store 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 12
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
+; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $4
+; NO-SIMD128-NEXT: i32.store 12($0), $pop1
; NO-SIMD128-NEXT: i32.const $push7=, 0
-; NO-SIMD128-NEXT: i32.sub $push4=, $pop7, $4
-; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4
+; NO-SIMD128-NEXT: i32.sub $push2=, $pop7, $3
+; NO-SIMD128-NEXT: i32.store 8($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push6=, 0
+; NO-SIMD128-NEXT: i32.sub $push3=, $pop6, $2
+; NO-SIMD128-NEXT: i32.store 4($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push5=, 0
+; NO-SIMD128-NEXT: i32.sub $push4=, $pop5, $1
+; NO-SIMD128-NEXT: i32.store 0($0), $pop4
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: neg_v4i32:
@@ -10439,17 +8781,15 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0
; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop9, $2
+; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop7, $2
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop8, $3
+; NO-SIMD128-FAST-NEXT: i32.const $push6=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop6, $3
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0
-; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop7, $4
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push5=, 0
+; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop5, $4
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4
; NO-SIMD128-FAST-NEXT: return
%a = sub <4 x i32> <i32 0, i32 0, i32 0, i32 0>, %x
ret <4 x i32> %a
@@ -10471,16 +8811,14 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
; NO-SIMD128-LABEL: shl_v4i32:
; NO-SIMD128: .functype shl_v4i32 (i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.shl $push0=, $3, $5
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.shl $push1=, $2, $5
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.shl $push3=, $4, $5
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.shl $push0=, $4, $5
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.shl $push1=, $3, $5
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.shl $push2=, $2, $5
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.shl $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_v4i32:
@@ -10492,10 +8830,8 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $5
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $5
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $5
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <4 x i32> undef, i32 %x, i32 0
%s = shufflevector <4 x i32> %t, <4 x i32> undef,
@@ -10523,19 +8859,17 @@ define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
; NO-SIMD128: .functype shl_const_v4i32 (i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 5
-; NO-SIMD128-NEXT: i32.shl $push1=, $3, $pop0
-; NO-SIMD128-NEXT: i32.store 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push9=, 5
-; NO-SIMD128-NEXT: i32.shl $push2=, $2, $pop9
-; NO-SIMD128-NEXT: i32.store 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push8=, 5
-; NO-SIMD128-NEXT: i32.shl $push3=, $1, $pop8
-; NO-SIMD128-NEXT: i32.store 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 12
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
+; NO-SIMD128-NEXT: i32.shl $push1=, $4, $pop0
+; NO-SIMD128-NEXT: i32.store 12($0), $pop1
; NO-SIMD128-NEXT: i32.const $push7=, 5
-; NO-SIMD128-NEXT: i32.shl $push4=, $4, $pop7
-; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4
+; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push6=, 5
+; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push5=, 5
+; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop4
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_const_v4i32:
@@ -10544,17 +8878,15 @@ define <4 x i32> @shl_const_v4i32(<4 x i32> %v) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5
; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop9
+; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop7
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push6=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop6
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5
-; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop7
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push5=, 5
+; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop5
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4
; NO-SIMD128-FAST-NEXT: return
%a = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %a
@@ -10606,16 +8938,14 @@ define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
; NO-SIMD128-LABEL: shl_vec_v4i32:
; NO-SIMD128: .functype shl_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.shl $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.shl $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.shl $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.shl $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.shl $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.shl $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.shl $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shl_vec_v4i32:
@@ -10627,10 +8957,8 @@ define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = shl <4 x i32> %v, %x
ret <4 x i32> %a
@@ -10652,16 +8980,14 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
; NO-SIMD128-LABEL: shr_s_v4i32:
; NO-SIMD128: .functype shr_s_v4i32 (i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $5
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $5
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $5
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.shr_s $push0=, $4, $5
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.shr_s $push1=, $3, $5
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.shr_s $push2=, $2, $5
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.shr_s $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_s_v4i32:
@@ -10673,10 +8999,8 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $5
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $5
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $4, $5
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <4 x i32> undef, i32 %x, i32 0
%s = shufflevector <4 x i32> %t, <4 x i32> undef,
@@ -10731,16 +9055,14 @@ define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
; NO-SIMD128-LABEL: shr_s_vec_v4i32:
; NO-SIMD128: .functype shr_s_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.shr_s $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.shr_s $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.shr_s $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.shr_s $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_s_vec_v4i32:
@@ -10752,10 +9074,8 @@ define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = ashr <4 x i32> %v, %x
ret <4 x i32> %a
@@ -10777,16 +9097,14 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
; NO-SIMD128-LABEL: shr_u_v4i32:
; NO-SIMD128: .functype shr_u_v4i32 (i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $5
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $5
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $5
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.shr_u $push0=, $4, $5
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.shr_u $push1=, $3, $5
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.shr_u $push2=, $2, $5
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.shr_u $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_u_v4i32:
@@ -10798,10 +9116,8 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $5
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $5
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $4, $5
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%t = insertelement <4 x i32> undef, i32 %x, i32 0
%s = shufflevector <4 x i32> %t, <4 x i32> undef,
@@ -10856,16 +9172,14 @@ define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
; NO-SIMD128-LABEL: shr_u_vec_v4i32:
; NO-SIMD128: .functype shr_u_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.shr_u $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.shr_u $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.shr_u $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.shr_u $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: shr_u_vec_v4i32:
@@ -10877,10 +9191,8 @@ define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = lshr <4 x i32> %v, %x
ret <4 x i32> %a
@@ -10902,16 +9214,14 @@ define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: and_v4i32:
; NO-SIMD128: .functype and_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.and $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.and $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.and $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.and $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.and $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.and $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.and $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.and $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: and_v4i32:
@@ -10923,10 +9233,8 @@ define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = and <4 x i32> %x, %y
ret <4 x i32> %a
@@ -10948,16 +9256,14 @@ define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: or_v4i32:
; NO-SIMD128: .functype or_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.or $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.or $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.or $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.or $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.or $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.or $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.or $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.or $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: or_v4i32:
@@ -10969,10 +9275,8 @@ define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = or <4 x i32> %x, %y
ret <4 x i32> %a
@@ -10994,16 +9298,14 @@ define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: xor_v4i32:
; NO-SIMD128: .functype xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.xor $push0=, $3, $7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop0
-; NO-SIMD128-NEXT: i32.xor $push1=, $2, $6
-; NO-SIMD128-NEXT: i32.store 4($0), $pop1
-; NO-SIMD128-NEXT: i32.xor $push2=, $1, $5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.xor $push3=, $4, $8
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: i32.xor $push0=, $4, $8
+; NO-SIMD128-NEXT: i32.store 12($0), $pop0
+; NO-SIMD128-NEXT: i32.xor $push1=, $3, $7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop1
+; NO-SIMD128-NEXT: i32.xor $push2=, $2, $6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop2
+; NO-SIMD128-NEXT: i32.xor $push3=, $1, $5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: xor_v4i32:
@@ -11015,10 +9317,8 @@ define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = xor <4 x i32> %x, %y
ret <4 x i32> %a
@@ -11041,19 +9341,17 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) {
; NO-SIMD128: .functype not_v4i32 (i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, -1
-; NO-SIMD128-NEXT: i32.xor $push1=, $3, $pop0
-; NO-SIMD128-NEXT: i32.store 8($0), $pop1
-; NO-SIMD128-NEXT: i32.const $push9=, -1
-; NO-SIMD128-NEXT: i32.xor $push2=, $2, $pop9
-; NO-SIMD128-NEXT: i32.store 4($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push8=, -1
-; NO-SIMD128-NEXT: i32.xor $push3=, $1, $pop8
-; NO-SIMD128-NEXT: i32.store 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push5=, 12
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
+; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0
+; NO-SIMD128-NEXT: i32.store 12($0), $pop1
; NO-SIMD128-NEXT: i32.const $push7=, -1
-; NO-SIMD128-NEXT: i32.xor $push4=, $4, $pop7
-; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4
+; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop7
+; NO-SIMD128-NEXT: i32.store 8($0), $pop2
+; NO-SIMD128-NEXT: i32.const $push6=, -1
+; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop6
+; NO-SIMD128-NEXT: i32.store 4($0), $pop3
+; NO-SIMD128-NEXT: i32.const $push5=, -1
+; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop5
+; NO-SIMD128-NEXT: i32.store 0($0), $pop4
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: not_v4i32:
@@ -11062,17 +9360,15 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) {
; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1
; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop9
+; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop7
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop8
+; NO-SIMD128-FAST-NEXT: i32.const $push6=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop6
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop7
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6
+; NO-SIMD128-FAST-NEXT: i32.const $push5=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop5
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4
; NO-SIMD128-FAST-NEXT: return
%a = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %a
@@ -11096,23 +9392,21 @@ define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128: .functype andnot_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, -1
-; NO-SIMD128-NEXT: i32.xor $push1=, $7, $pop0
-; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop1
-; NO-SIMD128-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push13=, -1
-; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop13
-; NO-SIMD128-NEXT: i32.and $push4=, $2, $pop3
-; NO-SIMD128-NEXT: i32.store 4($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push12=, -1
-; NO-SIMD128-NEXT: i32.xor $push5=, $5, $pop12
-; NO-SIMD128-NEXT: i32.and $push6=, $1, $pop5
-; NO-SIMD128-NEXT: i32.store 0($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0
+; NO-SIMD128-NEXT: i32.and $push2=, $4, $pop1
+; NO-SIMD128-NEXT: i32.store 12($0), $pop2
; NO-SIMD128-NEXT: i32.const $push11=, -1
-; NO-SIMD128-NEXT: i32.xor $push7=, $8, $pop11
-; NO-SIMD128-NEXT: i32.and $push8=, $4, $pop7
-; NO-SIMD128-NEXT: i32.store 0($pop10), $pop8
+; NO-SIMD128-NEXT: i32.xor $push3=, $7, $pop11
+; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3
+; NO-SIMD128-NEXT: i32.store 8($0), $pop4
+; NO-SIMD128-NEXT: i32.const $push10=, -1
+; NO-SIMD128-NEXT: i32.xor $push5=, $6, $pop10
+; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5
+; NO-SIMD128-NEXT: i32.store 4($0), $pop6
+; NO-SIMD128-NEXT: i32.const $push9=, -1
+; NO-SIMD128-NEXT: i32.xor $push7=, $5, $pop9
+; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7
+; NO-SIMD128-NEXT: i32.store 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: andnot_v4i32:
@@ -11122,20 +9416,18 @@ define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $5, $pop0
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop13
+; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop11
; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop12
+; NO-SIMD128-FAST-NEXT: i32.const $push10=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop10
; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push7=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7
-; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $pop11
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop8), $pop10
+; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $pop9
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%inv_y = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
%a = and <4 x i32> %x, %inv_y
@@ -11161,32 +9453,30 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
; NO-SIMD128-LABEL: bitselect_v4i32:
; NO-SIMD128: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 12
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.const $push1=, -1
; NO-SIMD128-NEXT: i32.xor $push2=, $4, $pop1
; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $12
; NO-SIMD128-NEXT: i32.and $push0=, $4, $8
; NO-SIMD128-NEXT: i32.or $push4=, $pop3, $pop0
-; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.const $push21=, -1
-; NO-SIMD128-NEXT: i32.xor $push8=, $3, $pop21
-; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $11
-; NO-SIMD128-NEXT: i32.and $push7=, $3, $7
-; NO-SIMD128-NEXT: i32.or $push10=, $pop9, $pop7
-; NO-SIMD128-NEXT: i32.store 8($0), $pop10
-; NO-SIMD128-NEXT: i32.const $push20=, -1
-; NO-SIMD128-NEXT: i32.xor $push12=, $2, $pop20
-; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $10
-; NO-SIMD128-NEXT: i32.and $push11=, $2, $6
-; NO-SIMD128-NEXT: i32.or $push14=, $pop13, $pop11
-; NO-SIMD128-NEXT: i32.store 4($0), $pop14
+; NO-SIMD128-NEXT: i32.store 12($0), $pop4
; NO-SIMD128-NEXT: i32.const $push19=, -1
-; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop19
-; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $9
-; NO-SIMD128-NEXT: i32.and $push15=, $1, $5
-; NO-SIMD128-NEXT: i32.or $push18=, $pop17, $pop15
-; NO-SIMD128-NEXT: i32.store 0($0), $pop18
+; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop19
+; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $11
+; NO-SIMD128-NEXT: i32.and $push5=, $3, $7
+; NO-SIMD128-NEXT: i32.or $push8=, $pop7, $pop5
+; NO-SIMD128-NEXT: i32.store 8($0), $pop8
+; NO-SIMD128-NEXT: i32.const $push18=, -1
+; NO-SIMD128-NEXT: i32.xor $push10=, $2, $pop18
+; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $10
+; NO-SIMD128-NEXT: i32.and $push9=, $2, $6
+; NO-SIMD128-NEXT: i32.or $push12=, $pop11, $pop9
+; NO-SIMD128-NEXT: i32.store 4($0), $pop12
+; NO-SIMD128-NEXT: i32.const $push17=, -1
+; NO-SIMD128-NEXT: i32.xor $push14=, $1, $pop17
+; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $9
+; NO-SIMD128-NEXT: i32.and $push13=, $1, $5
+; NO-SIMD128-NEXT: i32.or $push16=, $pop15, $pop13
+; NO-SIMD128-NEXT: i32.store 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_v4i32:
@@ -11198,26 +9488,24 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) {
; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5
; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop3, $pop0
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop19
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $10
; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $6
; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop7, $pop5
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop18
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $11
; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $7
; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop11, $pop9
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop19
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop17
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $12
; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $8
; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop15, $pop13
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%masked_v1 = and <4 x i32> %c, %v1
%inv_mask = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %c
@@ -11244,24 +9532,22 @@ define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2
; NO-SIMD128-LABEL: bitselect_xor_v4i32:
; NO-SIMD128: .functype bitselect_xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
; NO-SIMD128-NEXT: i32.xor $push0=, $8, $12
; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $4
; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $12
-; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2
-; NO-SIMD128-NEXT: i32.xor $push5=, $7, $11
-; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $3
-; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $11
-; NO-SIMD128-NEXT: i32.store 8($0), $pop7
-; NO-SIMD128-NEXT: i32.xor $push8=, $6, $10
-; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $2
-; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $10
-; NO-SIMD128-NEXT: i32.store 4($0), $pop10
-; NO-SIMD128-NEXT: i32.xor $push11=, $5, $9
-; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $1
-; NO-SIMD128-NEXT: i32.xor $push13=, $pop12, $9
-; NO-SIMD128-NEXT: i32.store 0($0), $pop13
+; NO-SIMD128-NEXT: i32.store 12($0), $pop2
+; NO-SIMD128-NEXT: i32.xor $push3=, $7, $11
+; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $3
+; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $11
+; NO-SIMD128-NEXT: i32.store 8($0), $pop5
+; NO-SIMD128-NEXT: i32.xor $push6=, $6, $10
+; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $2
+; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $10
+; NO-SIMD128-NEXT: i32.store 4($0), $pop8
+; NO-SIMD128-NEXT: i32.xor $push9=, $5, $9
+; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $1
+; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $9
+; NO-SIMD128-NEXT: i32.store 0($0), $pop11
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_xor_v4i32:
@@ -11279,12 +9565,10 @@ define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $11
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $8, $12
-; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4
-; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $12
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $12
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4
+; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $12
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11
; NO-SIMD128-FAST-NEXT: return
%xor1 = xor <4 x i32> %v1, %v2
%and = and <4 x i32> %xor1, %c
@@ -11311,32 +9595,30 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x
; NO-SIMD128-LABEL: bitselect_xor_reversed_v4i32:
; NO-SIMD128: .functype bitselect_xor_reversed_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push5=, 12
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
; NO-SIMD128-NEXT: i32.xor $push2=, $8, $12
; NO-SIMD128-NEXT: i32.const $push0=, -1
; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0
; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1
; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $12
-; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4
-; NO-SIMD128-NEXT: i32.xor $push8=, $7, $11
-; NO-SIMD128-NEXT: i32.const $push21=, -1
-; NO-SIMD128-NEXT: i32.xor $push7=, $3, $pop21
-; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $11
-; NO-SIMD128-NEXT: i32.store 8($0), $pop10
-; NO-SIMD128-NEXT: i32.xor $push12=, $6, $10
-; NO-SIMD128-NEXT: i32.const $push20=, -1
-; NO-SIMD128-NEXT: i32.xor $push11=, $2, $pop20
-; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $pop11
-; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $10
-; NO-SIMD128-NEXT: i32.store 4($0), $pop14
-; NO-SIMD128-NEXT: i32.xor $push16=, $5, $9
+; NO-SIMD128-NEXT: i32.store 12($0), $pop4
+; NO-SIMD128-NEXT: i32.xor $push6=, $7, $11
; NO-SIMD128-NEXT: i32.const $push19=, -1
-; NO-SIMD128-NEXT: i32.xor $push15=, $1, $pop19
-; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $pop15
-; NO-SIMD128-NEXT: i32.xor $push18=, $pop17, $9
-; NO-SIMD128-NEXT: i32.store 0($0), $pop18
+; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop19
+; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5
+; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $11
+; NO-SIMD128-NEXT: i32.store 8($0), $pop8
+; NO-SIMD128-NEXT: i32.xor $push10=, $6, $10
+; NO-SIMD128-NEXT: i32.const $push18=, -1
+; NO-SIMD128-NEXT: i32.xor $push9=, $2, $pop18
+; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9
+; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $10
+; NO-SIMD128-NEXT: i32.store 4($0), $pop12
+; NO-SIMD128-NEXT: i32.xor $push14=, $5, $9
+; NO-SIMD128-NEXT: i32.const $push17=, -1
+; NO-SIMD128-NEXT: i32.xor $push13=, $1, $pop17
+; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13
+; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $9
+; NO-SIMD128-NEXT: i32.store 0($0), $pop16
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v4i32:
@@ -11349,25 +9631,23 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x
; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $9
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4
; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $10
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop19
; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5
; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $10
; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8
; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $7, $11
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop20
+; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop18
; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9
; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $11
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12
-; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17
; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $8, $12
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1
-; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop19
+; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1
+; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop17
; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13
; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $12
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16
; NO-SIMD128-FAST-NEXT: return
%xor1 = xor <4 x i32> %v1, %v2
%notc = xor <4 x i32> %c, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -11394,24 +9674,22 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128-LABEL: extmul_low_s_v4i32:
; NO-SIMD128: .functype extmul_low_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.extend16_s $push1=, $3
-; NO-SIMD128-NEXT: i32.extend16_s $push0=, $11
+; NO-SIMD128-NEXT: i32.extend16_s $push1=, $4
+; NO-SIMD128-NEXT: i32.extend16_s $push0=, $12
; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0
-; NO-SIMD128-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-NEXT: i32.extend16_s $push4=, $2
-; NO-SIMD128-NEXT: i32.extend16_s $push3=, $10
+; NO-SIMD128-NEXT: i32.store 12($0), $pop2
+; NO-SIMD128-NEXT: i32.extend16_s $push4=, $3
+; NO-SIMD128-NEXT: i32.extend16_s $push3=, $11
; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3
-; NO-SIMD128-NEXT: i32.store 4($0), $pop5
-; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1
-; NO-SIMD128-NEXT: i32.extend16_s $push6=, $9
+; NO-SIMD128-NEXT: i32.store 8($0), $pop5
+; NO-SIMD128-NEXT: i32.extend16_s $push7=, $2
+; NO-SIMD128-NEXT: i32.extend16_s $push6=, $10
; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.store 0($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 12
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.extend16_s $push10=, $4
-; NO-SIMD128-NEXT: i32.extend16_s $push9=, $12
+; NO-SIMD128-NEXT: i32.store 4($0), $pop8
+; NO-SIMD128-NEXT: i32.extend16_s $push10=, $1
+; NO-SIMD128-NEXT: i32.extend16_s $push9=, $9
; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9
-; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11
+; NO-SIMD128-NEXT: i32.store 0($0), $pop11
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: extmul_low_s_v4i32:
@@ -11429,12 +9707,10 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $11
; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $4
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $12
-; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $12
+; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11
; NO-SIMD128-FAST-NEXT: return
%low1 = shufflevector <8 x i16> %v1, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -11464,24 +9740,22 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128-LABEL: extmul_high_s_v4i32:
; NO-SIMD128: .functype extmul_high_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.extend16_s $push1=, $7
-; NO-SIMD128-NEXT: i32.extend16_s $push0=, $15
+; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8
+; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16
; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0
-; NO-SIMD128-NEXT: i32.store 8($0), $pop2
-; NO-SIMD128-NEXT: i32.extend16_s $push4=, $6
-; NO-SIMD128-NEXT: i32.extend16_s $push3=, $14
+; NO-SIMD128-NEXT: i32.store 12($0), $pop2
+; NO-SIMD128-NEXT: i32.extend16_s $push4=, $7
+; NO-SIMD128-NEXT: i32.extend16_s $push3=, $15
; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3
-; NO-SIMD128-NEXT: i32.store 4($0), $pop5
-; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5
-; NO-SIMD128-NEXT: i32.extend16_s $push6=, $13
+; NO-SIMD128-NEXT: i32.store 8($0), $pop5
+; NO-SIMD128-NEXT: i32.extend16_s $push7=, $6
+; NO-SIMD128-NEXT: i32.extend16_s $push6=, $14
; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.store 0($0), $pop8
-; NO-SIMD128-NEXT: i32.const $push12=, 12
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.extend16_s $push10=, $8
-; NO-SIMD128-NEXT: i32.extend16_s $push9=, $16
+; NO-SIMD128-NEXT: i32.store 4($0), $pop8
+; NO-SIMD128-NEXT: i32.extend16_s $push10=, $5
+; NO-SIMD128-NEXT: i32.extend16_s $push9=, $13
; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9
-; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11
+; NO-SIMD128-NEXT: i32.store 0($0), $pop11
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: extmul_high_s_v4i32:
@@ -11499,12 +9773,10 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $15
; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $8
-; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $16
-; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $8
+; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $16
+; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11
; NO-SIMD128-FAST-NEXT: return
%high1 = shufflevector <8 x i16> %v1, <8 x i16> undef,
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -11535,31 +9807,29 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128: .functype extmul_low_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop0
-; NO-SIMD128-NEXT: i32.const $push21=, 65535
-; NO-SIMD128-NEXT: i32.and $push1=, $11, $pop21
-; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1
-; NO-SIMD128-NEXT: i32.store 8($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push20=, 65535
-; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop20
+; NO-SIMD128-NEXT: i32.and $push2=, $4, $pop0
; NO-SIMD128-NEXT: i32.const $push19=, 65535
-; NO-SIMD128-NEXT: i32.and $push4=, $10, $pop19
-; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4
-; NO-SIMD128-NEXT: i32.store 4($0), $pop6
+; NO-SIMD128-NEXT: i32.and $push1=, $12, $pop19
+; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1
+; NO-SIMD128-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-NEXT: i32.const $push18=, 65535
-; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop18
+; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop18
; NO-SIMD128-NEXT: i32.const $push17=, 65535
-; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop17
-; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.store 0($0), $pop9
-; NO-SIMD128-NEXT: i32.const $push13=, 12
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
+; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop17
+; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.store 8($0), $pop6
; NO-SIMD128-NEXT: i32.const $push16=, 65535
-; NO-SIMD128-NEXT: i32.and $push11=, $4, $pop16
+; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop16
; NO-SIMD128-NEXT: i32.const $push15=, 65535
-; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop15
+; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop15
+; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
+; NO-SIMD128-NEXT: i32.store 4($0), $pop9
+; NO-SIMD128-NEXT: i32.const $push14=, 65535
+; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop14
+; NO-SIMD128-NEXT: i32.const $push13=, 65535
+; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop13
; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12
+; NO-SIMD128-NEXT: i32.store 0($0), $pop12
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: extmul_low_u_v4i32:
@@ -11567,30 +9837,28 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop19
; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop19
-; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
-; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop18
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop18
; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop17
-; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7
-; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop17
+; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
+; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop16
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop16
; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop15
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop15
+; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.const $push14=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push13=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop13
; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop12
; NO-SIMD128-FAST-NEXT: return
%low1 = shufflevector <8 x i16> %v1, <8 x i16> undef,
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -11621,31 +9889,29 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128: .functype extmul_high_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.const $push0=, 65535
-; NO-SIMD128-NEXT: i32.and $push2=, $7, $pop0
-; NO-SIMD128-NEXT: i32.const $push21=, 65535
-; NO-SIMD128-NEXT: i32.and $push1=, $15, $pop21
-; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1
-; NO-SIMD128-NEXT: i32.store 8($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push20=, 65535
-; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop20
+; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0
; NO-SIMD128-NEXT: i32.const $push19=, 65535
-; NO-SIMD128-NEXT: i32.and $push4=, $14, $pop19
-; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4
-; NO-SIMD128-NEXT: i32.store 4($0), $pop6
+; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop19
+; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1
+; NO-SIMD128-NEXT: i32.store 12($0), $pop3
; NO-SIMD128-NEXT: i32.const $push18=, 65535
-; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop18
+; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop18
; NO-SIMD128-NEXT: i32.const $push17=, 65535
-; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop17
-; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
-; NO-SIMD128-NEXT: i32.store 0($0), $pop9
-; NO-SIMD128-NEXT: i32.const $push13=, 12
-; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13
+; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop17
+; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.store 8($0), $pop6
; NO-SIMD128-NEXT: i32.const $push16=, 65535
-; NO-SIMD128-NEXT: i32.and $push11=, $8, $pop16
+; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop16
; NO-SIMD128-NEXT: i32.const $push15=, 65535
-; NO-SIMD128-NEXT: i32.and $push10=, $16, $pop15
+; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop15
+; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
+; NO-SIMD128-NEXT: i32.store 4($0), $pop9
+; NO-SIMD128-NEXT: i32.const $push14=, 65535
+; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop14
+; NO-SIMD128-NEXT: i32.const $push13=, 65535
+; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop13
; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12
+; NO-SIMD128-NEXT: i32.store 0($0), $pop12
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: extmul_high_u_v4i32:
@@ -11653,30 +9919,28 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) {
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535
; NO-SIMD128-FAST-NEXT: i32.and $push2=, $5, $pop0
-; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop21
+; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop19
; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1
; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop20
-; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop19
-; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
-; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop18
+; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop18
; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop17
-; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7
-; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9
-; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13
+; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop17
+; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
+; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6
; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop16
+; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop16
; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535
-; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop15
+; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop15
+; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7
+; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9
+; NO-SIMD128-FAST-NEXT: i32.const $push14=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop14
+; NO-SIMD128-FAST-NEXT: i32.const $push13=, 65535
+; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop13
; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop12
; NO-SIMD128-FAST-NEXT: return
%high1 = shufflevector <8 x i16> %v1, <8 x i16> undef,
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -13061,16 +11325,14 @@ define <4 x float> @neg_v4f32(<4 x float> %x) {
; NO-SIMD128-LABEL: neg_v4f32:
; NO-SIMD128: .functype neg_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.neg $push0=, $3
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.neg $push1=, $2
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.neg $push2=, $1
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-NEXT: f32.neg $push5=, $4
-; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-NEXT: f32.neg $push0=, $4
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.neg $push1=, $3
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.neg $push2=, $2
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.neg $push3=, $1
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: neg_v4f32:
@@ -13082,10 +11344,8 @@ define <4 x float> @neg_v4f32(<4 x float> %x) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.neg $push2=, $3
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.neg $push5=, $4
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.neg $push3=, $4
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = fsub nsz <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, %x
ret <4 x float> %a
@@ -13108,16 +11368,14 @@ define <4 x float> @abs_v4f32(<4 x float> %x) {
; NO-SIMD128-LABEL: abs_v4f32:
; NO-SIMD128: .functype abs_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.abs $push0=, $3
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.abs $push1=, $2
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.abs $push2=, $1
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-NEXT: f32.abs $push5=, $4
-; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-NEXT: f32.abs $push0=, $4
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.abs $push1=, $3
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.abs $push2=, $2
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.abs $push3=, $1
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: abs_v4f32:
@@ -13129,10 +11387,8 @@ define <4 x float> @abs_v4f32(<4 x float> %x) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.abs $push2=, $3
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.abs $push5=, $4
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.abs $push3=, $4
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
ret <4 x float> %a
@@ -13157,54 +11413,50 @@ define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
; NO-SIMD128: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.gt $push1=, $3, $pop17
-; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1
-; NO-SIMD128-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.gt $push3=, $2, $pop15
-; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3
-; NO-SIMD128-NEXT: f32.store 4($0), $pop4
+; NO-SIMD128-NEXT: f32.gt $push1=, $4, $pop15
+; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1
+; NO-SIMD128-NEXT: f32.store 12($0), $pop2
; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.gt $push5=, $1, $pop13
-; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-NEXT: f32.gt $push3=, $3, $pop13
+; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3
+; NO-SIMD128-NEXT: f32.store 8($0), $pop4
; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.gt $push7=, $4, $pop11
-; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7
-; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8
+; NO-SIMD128-NEXT: f32.gt $push5=, $2, $pop11
+; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5
+; NO-SIMD128-NEXT: f32.store 4($0), $pop6
+; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2
+; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2
+; NO-SIMD128-NEXT: f32.gt $push7=, $1, $pop9
+; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7
+; NO-SIMD128-NEXT: f32.store 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_unordered_v4f32:
; NO-SIMD128-FAST: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop17
+; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop15
; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop15
-; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2
; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop13
-; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop13
+; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2
; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop11
-; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8
+; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop11
+; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6
+; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop9
+; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
%a = select <4 x i1> %cmps, <4 x float> %x,
@@ -13231,54 +11483,50 @@ define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
; NO-SIMD128: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.lt $push1=, $3, $pop17
-; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1
-; NO-SIMD128-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.lt $push3=, $2, $pop15
-; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3
-; NO-SIMD128-NEXT: f32.store 4($0), $pop4
+; NO-SIMD128-NEXT: f32.lt $push1=, $4, $pop15
+; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1
+; NO-SIMD128-NEXT: f32.store 12($0), $pop2
; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.lt $push5=, $1, $pop13
-; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-NEXT: f32.lt $push3=, $3, $pop13
+; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3
+; NO-SIMD128-NEXT: f32.store 8($0), $pop4
; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.lt $push7=, $4, $pop11
-; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7
-; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8
+; NO-SIMD128-NEXT: f32.lt $push5=, $2, $pop11
+; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5
+; NO-SIMD128-NEXT: f32.store 4($0), $pop6
+; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2
+; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2
+; NO-SIMD128-NEXT: f32.lt $push7=, $1, $pop9
+; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7
+; NO-SIMD128-NEXT: f32.store 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_unordered_v4f32:
; NO-SIMD128-FAST: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop17
+; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop15
; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop15
-; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2
; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop13
-; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop13
+; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2
; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop11
-; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8
+; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop11
+; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6
+; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop9
+; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
%a = select <4 x i1> %cmps, <4 x float> %x,
@@ -13305,54 +11553,50 @@ define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
; NO-SIMD128: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.ge $push1=, $3, $pop17
-; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1
-; NO-SIMD128-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.ge $push3=, $2, $pop15
-; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3
-; NO-SIMD128-NEXT: f32.store 4($0), $pop4
+; NO-SIMD128-NEXT: f32.ge $push1=, $4, $pop15
+; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1
+; NO-SIMD128-NEXT: f32.store 12($0), $pop2
; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.ge $push5=, $1, $pop13
-; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-NEXT: f32.ge $push3=, $3, $pop13
+; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3
+; NO-SIMD128-NEXT: f32.store 8($0), $pop4
; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.ge $push7=, $4, $pop11
-; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7
-; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8
+; NO-SIMD128-NEXT: f32.ge $push5=, $2, $pop11
+; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5
+; NO-SIMD128-NEXT: f32.store 4($0), $pop6
+; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2
+; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2
+; NO-SIMD128-NEXT: f32.ge $push7=, $1, $pop9
+; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7
+; NO-SIMD128-NEXT: f32.store 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_ordered_v4f32:
; NO-SIMD128-FAST: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop17
+; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop15
; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop15
-; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2
; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop13
-; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop13
+; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2
; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop11
-; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8
+; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop11
+; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6
+; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop9
+; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
%a = select <4 x i1> %cmps,
@@ -13379,54 +11623,50 @@ define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
; NO-SIMD128: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.le $push1=, $3, $pop17
-; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1
-; NO-SIMD128-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.le $push3=, $2, $pop15
-; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3
-; NO-SIMD128-NEXT: f32.store 4($0), $pop4
+; NO-SIMD128-NEXT: f32.le $push1=, $4, $pop15
+; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1
+; NO-SIMD128-NEXT: f32.store 12($0), $pop2
; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.le $push5=, $1, $pop13
-; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop6
-; NO-SIMD128-NEXT: i32.const $push9=, 12
-; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-NEXT: f32.le $push3=, $3, $pop13
+; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3
+; NO-SIMD128-NEXT: f32.store 8($0), $pop4
; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2
; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2
-; NO-SIMD128-NEXT: f32.le $push7=, $4, $pop11
-; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7
-; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8
+; NO-SIMD128-NEXT: f32.le $push5=, $2, $pop11
+; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5
+; NO-SIMD128-NEXT: f32.store 4($0), $pop6
+; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2
+; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2
+; NO-SIMD128-NEXT: f32.le $push7=, $1, $pop9
+; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7
+; NO-SIMD128-NEXT: f32.store 0($0), $pop8
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_ordered_v4f32:
; NO-SIMD128-FAST: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-FAST-NEXT: # %bb.0:
; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop17
+; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop15
; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop15
-; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3
-; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2
; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop13
-; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5
-; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6
-; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9
+; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop13
+; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4
; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2
; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2
-; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop11
-; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8
+; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop11
+; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6
+; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2
+; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop9
+; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8
; NO-SIMD128-FAST-NEXT: return
%cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
%a = select <4 x i1> %cmps,
@@ -13451,16 +11691,14 @@ define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: min_intrinsic_v4f32:
; NO-SIMD128: .functype min_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.min $push0=, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.min $push1=, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.min $push2=, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: f32.min $push3=, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: f32.min $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.min $push1=, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.min $push2=, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.min $push3=, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: min_intrinsic_v4f32:
@@ -13472,10 +11710,8 @@ define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.min $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.min $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -13552,16 +11788,14 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: minnum_intrinsic_v4f32:
; NO-SIMD128: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32:
@@ -13573,10 +11807,8 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -13598,16 +11830,14 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: minnum_nsz_intrinsic_v4f32:
; NO-SIMD128: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: minnum_nsz_intrinsic_v4f32:
@@ -13619,10 +11849,8 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -13647,19 +11875,17 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0
-; NO-SIMD128-NEXT: call $push2=, fminf, $2, $pop9
-; NO-SIMD128-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0
-; NO-SIMD128-NEXT: call $push3=, fminf, $1, $pop8
-; NO-SIMD128-NEXT: f32.store 0($0), $pop3
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
+; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-NEXT: call $push6=, fminf, $4, $pop7
-; NO-SIMD128-NEXT: f32.store 0($pop5), $pop6
+; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop2
+; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
+; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-NEXT: f32.const $push5=, -0x1p0
+; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop4
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: fminnumv432_non_zero_intrinsic:
@@ -13668,17 +11894,15 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0
; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0
; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
-; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop9
+; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop7
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2
-; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop8
+; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop6
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3
-; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push6=, fminf, $4, $pop7
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6
+; NO-SIMD128-FAST-NEXT: f32.const $push5=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $4, $pop5
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop4
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float -1.0, float -1.0, float -1.0>)
ret <4 x float> %a
@@ -13755,19 +11979,17 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0
-; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop2
-; NO-SIMD128-NEXT: f32.store 4($0), $pop3
-; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0
-; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop9
-; NO-SIMD128-NEXT: f32.store 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push5=, 12
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0
-; NO-SIMD128-NEXT: call $push7=, fminf, $4, $pop8
-; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7
+; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop2
+; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0
+; NO-SIMD128-NEXT: call $push4=, fminf, $2, $pop3
+; NO-SIMD128-NEXT: f32.store 4($0), $pop4
+; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
+; NO-SIMD128-NEXT: call $push5=, fminf, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop5
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic:
@@ -13779,14 +12001,12 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0
; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $2, $pop2
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
-; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop9
+; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push7=, fminf, $4, $pop8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7
+; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 0.0, float -1.0, float -1.0>)
ret <4 x float> %a
@@ -13809,16 +12029,14 @@ define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: max_intrinsic_v4f32:
; NO-SIMD128: .functype max_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.max $push0=, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.max $push1=, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.max $push2=, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: f32.max $push3=, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: f32.max $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.max $push1=, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.max $push2=, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.max $push3=, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: max_intrinsic_v4f32:
@@ -13830,10 +12048,8 @@ define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.max $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -13910,16 +12126,14 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32:
; NO-SIMD128: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32:
@@ -13931,10 +12145,8 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -13956,16 +12168,14 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: maxnum_nsz_intrinsic_v4f32:
; NO-SIMD128: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_nsz_intrinsic_v4f32:
@@ -13977,10 +12187,8 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %a
@@ -14057,19 +12265,17 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0
-; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2
-; NO-SIMD128-NEXT: f32.store 4($0), $pop3
-; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0
-; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9
-; NO-SIMD128-NEXT: f32.store 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push5=, 12
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0
-; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8
-; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7
+; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop2
+; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0
+; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3
+; NO-SIMD128-NEXT: f32.store 4($0), $pop4
+; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
+; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop5
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32:
@@ -14081,14 +12287,12 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0
; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
-; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9
+; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7
+; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 0.0, float -1.0, float -1.0>)
ret <4 x float> %a
@@ -14113,19 +12317,17 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0
-; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: f32.const $push2=, 0x1p0
-; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2
-; NO-SIMD128-NEXT: f32.store 4($0), $pop3
-; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0
-; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9
-; NO-SIMD128-NEXT: f32.store 0($0), $pop4
-; NO-SIMD128-NEXT: i32.const $push5=, 12
-; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0
-; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8
-; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7
+; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop2
+; NO-SIMD128-NEXT: f32.const $push3=, 0x1p0
+; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3
+; NO-SIMD128-NEXT: f32.store 4($0), $pop4
+; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0
+; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop5
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: maxnum_non_zero_intrinsic_v4f32:
@@ -14137,14 +12339,12 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float>
; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x1p0
; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
-; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9
+; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4
-; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5
-; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0
-; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7
+; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0
+; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5
; NO-SIMD128-FAST-NEXT: return
%a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float><float -1.0, float 1.0, float -1.0, float -1.0>)
ret <4 x float> %a
@@ -14240,20 +12440,18 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: pmin_v4f32:
; NO-SIMD128: .functype pmin_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.lt $push0=, $7, $3
-; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: f32.lt $push2=, $6, $2
-; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2
-; NO-SIMD128-NEXT: f32.store 4($0), $pop3
-; NO-SIMD128-NEXT: f32.lt $push4=, $5, $1
-; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4
-; NO-SIMD128-NEXT: f32.store 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: f32.lt $push6=, $8, $4
-; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6
-; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7
+; NO-SIMD128-NEXT: f32.lt $push0=, $8, $4
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $7, $3
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $6, $2
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $5, $1
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: pmin_v4f32:
@@ -14268,11 +12466,9 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $7, $3
; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $8, $4
; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%c = fcmp olt <4 x float> %y, %x
%a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
@@ -14295,28 +12491,26 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: pmin_int_v4f32:
; NO-SIMD128: .functype pmin_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $8
; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $4
; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0
; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $7
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $3
-; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8
-; NO-SIMD128-NEXT: i32.store 8($0), $pop9
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $6
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $2
-; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12
-; NO-SIMD128-NEXT: i32.store 4($0), $pop13
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $5
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $1
-; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14
-; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16
-; NO-SIMD128-NEXT: i32.store 0($0), $pop17
+; NO-SIMD128-NEXT: i32.store 12($0), $pop3
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push5=, $7
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push4=, $3
+; NO-SIMD128-NEXT: f32.lt $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.select $push7=, $7, $3, $pop6
+; NO-SIMD128-NEXT: i32.store 8($0), $pop7
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push9=, $6
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push8=, $2
+; NO-SIMD128-NEXT: f32.lt $push10=, $pop9, $pop8
+; NO-SIMD128-NEXT: i32.select $push11=, $6, $2, $pop10
+; NO-SIMD128-NEXT: i32.store 4($0), $pop11
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push13=, $5
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push12=, $1
+; NO-SIMD128-NEXT: f32.lt $push14=, $pop13, $pop12
+; NO-SIMD128-NEXT: i32.select $push15=, $5, $1, $pop14
+; NO-SIMD128-NEXT: i32.store 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: pmin_int_v4f32:
@@ -14337,13 +12531,11 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8
; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16
; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $8
; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $4
; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12
; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%fx = bitcast <4 x i32> %x to <4 x float>
%fy = bitcast <4 x i32> %y to <4 x float>
@@ -14368,20 +12560,18 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: pmax_v4f32:
; NO-SIMD128: .functype pmax_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.lt $push0=, $3, $7
-; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0
-; NO-SIMD128-NEXT: f32.store 8($0), $pop1
-; NO-SIMD128-NEXT: f32.lt $push2=, $2, $6
-; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2
-; NO-SIMD128-NEXT: f32.store 4($0), $pop3
-; NO-SIMD128-NEXT: f32.lt $push4=, $1, $5
-; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4
-; NO-SIMD128-NEXT: f32.store 0($0), $pop5
-; NO-SIMD128-NEXT: i32.const $push8=, 12
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: f32.lt $push6=, $4, $8
-; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6
-; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7
+; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: pmax_v4f32:
@@ -14396,11 +12586,9 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
-; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8
; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8
; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
; NO-SIMD128-FAST-NEXT: return
%c = fcmp olt <4 x float> %x, %y
%a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
@@ -14423,28 +12611,26 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: pmax_int_v4f32:
; NO-SIMD128: .functype pmax_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $4
; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $8
; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0
; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2
-; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $3
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $7
-; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6
-; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8
-; NO-SIMD128-NEXT: i32.store 8($0), $pop9
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $2
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $6
-; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10
-; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12
-; NO-SIMD128-NEXT: i32.store 4($0), $pop13
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $1
-; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $5
-; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14
-; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16
-; NO-SIMD128-NEXT: i32.store 0($0), $pop17
+; NO-SIMD128-NEXT: i32.store 12($0), $pop3
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push5=, $3
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push4=, $7
+; NO-SIMD128-NEXT: f32.lt $push6=, $pop5, $pop4
+; NO-SIMD128-NEXT: i32.select $push7=, $7, $3, $pop6
+; NO-SIMD128-NEXT: i32.store 8($0), $pop7
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push9=, $2
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push8=, $6
+; NO-SIMD128-NEXT: f32.lt $push10=, $pop9, $pop8
+; NO-SIMD128-NEXT: i32.select $push11=, $6, $2, $pop10
+; NO-SIMD128-NEXT: i32.store 4($0), $pop11
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push13=, $1
+; NO-SIMD128-NEXT: f32.reinterpret_i32 $push12=, $5
+; NO-SIMD128-NEXT: f32.lt $push14=, $pop13, $pop12
+; NO-SIMD128-NEXT: i32.select $push15=, $5, $1, $pop14
+; NO-SIMD128-NEXT: i32.store 0($0), $pop15
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: pmax_int_v4f32:
@@ -14465,13 +12651,11 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8
; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10
; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11
-; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16
; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $4
; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $8
; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12
; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14
-; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15
+; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop15
; NO-SIMD128-FAST-NEXT: return
%fx = bitcast <4 x i32> %x to <4 x float>
%fy = bitcast <4 x i32> %y to <4 x float>
@@ -14496,16 +12680,14 @@ define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: add_v4f32:
; NO-SIMD128: .functype add_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.add $push0=, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.add $push1=, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.add $push2=, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: f32.add $push3=, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: f32.add $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.add $push1=, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.add $push2=, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.add $push3=, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: add_v4f32:
@@ -14517,10 +12699,8 @@ define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.add $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.add $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.add $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = fadd <4 x float> %x, %y
ret <4 x float> %a
@@ -14542,16 +12722,14 @@ define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: sub_v4f32:
; NO-SIMD128: .functype sub_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.sub $push0=, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.sub $push1=, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.sub $push2=, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: f32.sub $push3=, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: f32.sub $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.sub $push1=, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.sub $push2=, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.sub $push3=, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: sub_v4f32:
@@ -14563,10 +12741,8 @@ define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.sub $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.sub $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.sub $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = fsub <4 x float> %x, %y
ret <4 x float> %a
@@ -14588,16 +12764,14 @@ define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: div_v4f32:
; NO-SIMD128: .functype div_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.div $push0=, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.div $push1=, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.div $push2=, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: f32.div $push3=, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: f32.div $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.div $push1=, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.div $push2=, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.div $push3=, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: div_v4f32:
@@ -14609,10 +12783,8 @@ define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.div $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.div $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.div $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = fdiv <4 x float> %x, %y
ret <4 x float> %a
@@ -14634,16 +12806,14 @@ define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: mul_v4f32:
; NO-SIMD128: .functype mul_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.mul $push0=, $3, $7
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.mul $push1=, $2, $6
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.mul $push2=, $1, $5
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push4=, 12
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: f32.mul $push3=, $4, $8
-; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3
+; NO-SIMD128-NEXT: f32.mul $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.mul $push1=, $3, $7
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.mul $push2=, $2, $6
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.mul $push3=, $1, $5
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: mul_v4f32:
@@ -14655,10 +12825,8 @@ define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.mul $push2=, $3, $7
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.mul $push5=, $4, $8
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.mul $push3=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = fmul <4 x float> %x, %y
ret <4 x float> %a
@@ -14681,16 +12849,14 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) {
; NO-SIMD128-LABEL: sqrt_v4f32:
; NO-SIMD128: .functype sqrt_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
-; NO-SIMD128-NEXT: f32.sqrt $push0=, $3
-; NO-SIMD128-NEXT: f32.store 8($0), $pop0
-; NO-SIMD128-NEXT: f32.sqrt $push1=, $2
-; NO-SIMD128-NEXT: f32.store 4($0), $pop1
-; NO-SIMD128-NEXT: f32.sqrt $push2=, $1
-; NO-SIMD128-NEXT: f32.store 0($0), $pop2
-; NO-SIMD128-NEXT: i32.const $push3=, 12
-; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-NEXT: f32.sqrt $push5=, $4
-; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-NEXT: f32.sqrt $push0=, $4
+; NO-SIMD128-NEXT: f32.store 12($0), $pop0
+; NO-SIMD128-NEXT: f32.sqrt $push1=, $3
+; NO-SIMD128-NEXT: f32.store 8($0), $pop1
+; NO-SIMD128-NEXT: f32.sqrt $push2=, $2
+; NO-SIMD128-NEXT: f32.store 4($0), $pop2
+; NO-SIMD128-NEXT: f32.sqrt $push3=, $1
+; NO-SIMD128-NEXT: f32.store 0($0), $pop3
; NO-SIMD128-NEXT: return
;
; NO-SIMD128-FAST-LABEL: sqrt_v4f32:
@@ -14702,10 +12868,8 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) {
; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1
; NO-SIMD128-FAST-NEXT: f32.sqrt $push2=, $3
; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2
-; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12
-; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3
-; NO-SIMD128-FAST-NEXT: f32.sqrt $push5=, $4
-; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5
+; NO-SIMD128-FAST-NEXT: f32.sqrt $push3=, $4
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3
; NO-SIMD128-FAST-NEXT: return
%a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
ret <4 x float> %a
diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
index d2a38de..5ec9f6a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -38,44 +38,22 @@ define <16 x i8> @splat_v16i8(i8 %x) {
; NO-SIMD128-LABEL: splat_v16i8:
; NO-SIMD128: .functype splat_v16i8 (i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store8 15($0), $1
+; NO-SIMD128-NEXT: i32.store8 14($0), $1
+; NO-SIMD128-NEXT: i32.store8 13($0), $1
+; NO-SIMD128-NEXT: i32.store8 12($0), $1
+; NO-SIMD128-NEXT: i32.store8 11($0), $1
+; NO-SIMD128-NEXT: i32.store8 10($0), $1
+; NO-SIMD128-NEXT: i32.store8 9($0), $1
; NO-SIMD128-NEXT: i32.store8 8($0), $1
+; NO-SIMD128-NEXT: i32.store8 7($0), $1
+; NO-SIMD128-NEXT: i32.store8 6($0), $1
+; NO-SIMD128-NEXT: i32.store8 5($0), $1
; NO-SIMD128-NEXT: i32.store8 4($0), $1
+; NO-SIMD128-NEXT: i32.store8 3($0), $1
; NO-SIMD128-NEXT: i32.store8 2($0), $1
; NO-SIMD128-NEXT: i32.store8 1($0), $1
; NO-SIMD128-NEXT: i32.store8 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 15
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store8 0($pop1), $1
-; NO-SIMD128-NEXT: i32.const $push2=, 14
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store8 0($pop3), $1
-; NO-SIMD128-NEXT: i32.const $push4=, 13
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $1
-; NO-SIMD128-NEXT: i32.const $push6=, 12
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $1
-; NO-SIMD128-NEXT: i32.const $push8=, 11
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $1
-; NO-SIMD128-NEXT: i32.const $push10=, 10
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $1
-; NO-SIMD128-NEXT: i32.const $push12=, 9
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $1
-; NO-SIMD128-NEXT: i32.const $push14=, 7
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $1
-; NO-SIMD128-NEXT: i32.const $push16=, 6
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $1
-; NO-SIMD128-NEXT: i32.const $push18=, 5
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $1
-; NO-SIMD128-NEXT: i32.const $push20=, 3
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.store8 0($pop21), $1
; NO-SIMD128-NEXT: return
%v = insertelement <16 x i8> undef, i8 %x, i32 0
%res = shufflevector <16 x i8> %v, <16 x i8> undef,
@@ -356,44 +334,22 @@ define <16 x i8> @replace_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-LABEL: replace_v16i8:
; NO-SIMD128: .functype replace_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store8 15($0), $16
+; NO-SIMD128-NEXT: i32.store8 14($0), $15
+; NO-SIMD128-NEXT: i32.store8 13($0), $14
+; NO-SIMD128-NEXT: i32.store8 12($0), $13
+; NO-SIMD128-NEXT: i32.store8 11($0), $17
+; NO-SIMD128-NEXT: i32.store8 10($0), $11
+; NO-SIMD128-NEXT: i32.store8 9($0), $10
; NO-SIMD128-NEXT: i32.store8 8($0), $9
+; NO-SIMD128-NEXT: i32.store8 7($0), $8
+; NO-SIMD128-NEXT: i32.store8 6($0), $7
+; NO-SIMD128-NEXT: i32.store8 5($0), $6
; NO-SIMD128-NEXT: i32.store8 4($0), $5
+; NO-SIMD128-NEXT: i32.store8 3($0), $4
; NO-SIMD128-NEXT: i32.store8 2($0), $3
; NO-SIMD128-NEXT: i32.store8 1($0), $2
; NO-SIMD128-NEXT: i32.store8 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 15
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store8 0($pop1), $16
-; NO-SIMD128-NEXT: i32.const $push2=, 14
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store8 0($pop3), $15
-; NO-SIMD128-NEXT: i32.const $push4=, 13
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $14
-; NO-SIMD128-NEXT: i32.const $push6=, 12
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $13
-; NO-SIMD128-NEXT: i32.const $push8=, 11
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $17
-; NO-SIMD128-NEXT: i32.const $push10=, 10
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $11
-; NO-SIMD128-NEXT: i32.const $push12=, 9
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $10
-; NO-SIMD128-NEXT: i32.const $push14=, 7
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $8
-; NO-SIMD128-NEXT: i32.const $push16=, 6
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $7
-; NO-SIMD128-NEXT: i32.const $push18=, 5
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $6
-; NO-SIMD128-NEXT: i32.const $push20=, 3
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.store8 0($pop21), $4
; NO-SIMD128-NEXT: return
%res = insertelement <16 x i8> %v, i8 %x, i32 11
ret <16 x i8> %res
@@ -461,44 +417,22 @@ define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) {
; NO-SIMD128-LABEL: replace_zero_v16i8:
; NO-SIMD128: .functype replace_zero_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store8 15($0), $16
+; NO-SIMD128-NEXT: i32.store8 14($0), $15
+; NO-SIMD128-NEXT: i32.store8 13($0), $14
+; NO-SIMD128-NEXT: i32.store8 12($0), $13
+; NO-SIMD128-NEXT: i32.store8 11($0), $12
+; NO-SIMD128-NEXT: i32.store8 10($0), $11
+; NO-SIMD128-NEXT: i32.store8 9($0), $10
; NO-SIMD128-NEXT: i32.store8 8($0), $9
+; NO-SIMD128-NEXT: i32.store8 7($0), $8
+; NO-SIMD128-NEXT: i32.store8 6($0), $7
+; NO-SIMD128-NEXT: i32.store8 5($0), $6
; NO-SIMD128-NEXT: i32.store8 4($0), $5
+; NO-SIMD128-NEXT: i32.store8 3($0), $4
; NO-SIMD128-NEXT: i32.store8 2($0), $3
; NO-SIMD128-NEXT: i32.store8 1($0), $2
; NO-SIMD128-NEXT: i32.store8 0($0), $17
-; NO-SIMD128-NEXT: i32.const $push0=, 15
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store8 0($pop1), $16
-; NO-SIMD128-NEXT: i32.const $push2=, 14
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store8 0($pop3), $15
-; NO-SIMD128-NEXT: i32.const $push4=, 13
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $14
-; NO-SIMD128-NEXT: i32.const $push6=, 12
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $13
-; NO-SIMD128-NEXT: i32.const $push8=, 11
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $12
-; NO-SIMD128-NEXT: i32.const $push10=, 10
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $11
-; NO-SIMD128-NEXT: i32.const $push12=, 9
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $10
-; NO-SIMD128-NEXT: i32.const $push14=, 7
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $8
-; NO-SIMD128-NEXT: i32.const $push16=, 6
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $7
-; NO-SIMD128-NEXT: i32.const $push18=, 5
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $6
-; NO-SIMD128-NEXT: i32.const $push20=, 3
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.store8 0($pop21), $4
; NO-SIMD128-NEXT: return
%res = insertelement <16 x i8> %v, i8 %x, i32 0
ret <16 x i8> %res
@@ -514,44 +448,22 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: shuffle_v16i8:
; NO-SIMD128: .functype shuffle_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store8 15($0), $32
+; NO-SIMD128-NEXT: i32.store8 14($0), $15
+; NO-SIMD128-NEXT: i32.store8 13($0), $30
+; NO-SIMD128-NEXT: i32.store8 12($0), $13
+; NO-SIMD128-NEXT: i32.store8 11($0), $28
+; NO-SIMD128-NEXT: i32.store8 10($0), $11
+; NO-SIMD128-NEXT: i32.store8 9($0), $26
; NO-SIMD128-NEXT: i32.store8 8($0), $9
+; NO-SIMD128-NEXT: i32.store8 7($0), $24
+; NO-SIMD128-NEXT: i32.store8 6($0), $7
+; NO-SIMD128-NEXT: i32.store8 5($0), $22
; NO-SIMD128-NEXT: i32.store8 4($0), $5
+; NO-SIMD128-NEXT: i32.store8 3($0), $20
; NO-SIMD128-NEXT: i32.store8 2($0), $3
; NO-SIMD128-NEXT: i32.store8 1($0), $18
; NO-SIMD128-NEXT: i32.store8 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 15
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store8 0($pop1), $32
-; NO-SIMD128-NEXT: i32.const $push2=, 14
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store8 0($pop3), $15
-; NO-SIMD128-NEXT: i32.const $push4=, 13
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $30
-; NO-SIMD128-NEXT: i32.const $push6=, 12
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $13
-; NO-SIMD128-NEXT: i32.const $push8=, 11
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $28
-; NO-SIMD128-NEXT: i32.const $push10=, 10
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $11
-; NO-SIMD128-NEXT: i32.const $push12=, 9
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $26
-; NO-SIMD128-NEXT: i32.const $push14=, 7
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $24
-; NO-SIMD128-NEXT: i32.const $push16=, 6
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $7
-; NO-SIMD128-NEXT: i32.const $push18=, 5
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $22
-; NO-SIMD128-NEXT: i32.const $push20=, 3
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.store8 0($pop21), $20
; NO-SIMD128-NEXT: return
%res = shufflevector <16 x i8> %x, <16 x i8> %y,
<16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23,
@@ -569,44 +481,22 @@ define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v16i8:
; NO-SIMD128: .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store8 15($0), $2
+; NO-SIMD128-NEXT: i32.store8 14($0), $2
+; NO-SIMD128-NEXT: i32.store8 13($0), $2
+; NO-SIMD128-NEXT: i32.store8 12($0), $2
+; NO-SIMD128-NEXT: i32.store8 11($0), $2
+; NO-SIMD128-NEXT: i32.store8 10($0), $2
+; NO-SIMD128-NEXT: i32.store8 9($0), $2
; NO-SIMD128-NEXT: i32.store8 8($0), $2
+; NO-SIMD128-NEXT: i32.store8 7($0), $2
+; NO-SIMD128-NEXT: i32.store8 6($0), $2
+; NO-SIMD128-NEXT: i32.store8 5($0), $2
; NO-SIMD128-NEXT: i32.store8 4($0), $2
+; NO-SIMD128-NEXT: i32.store8 3($0), $2
; NO-SIMD128-NEXT: i32.store8 2($0), $2
; NO-SIMD128-NEXT: i32.store8 1($0), $2
; NO-SIMD128-NEXT: i32.store8 0($0), $2
-; NO-SIMD128-NEXT: i32.const $push0=, 15
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store8 0($pop1), $2
-; NO-SIMD128-NEXT: i32.const $push2=, 14
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store8 0($pop3), $2
-; NO-SIMD128-NEXT: i32.const $push4=, 13
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $2
-; NO-SIMD128-NEXT: i32.const $push6=, 12
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $2
-; NO-SIMD128-NEXT: i32.const $push8=, 11
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $2
-; NO-SIMD128-NEXT: i32.const $push10=, 10
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $2
-; NO-SIMD128-NEXT: i32.const $push12=, 9
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $2
-; NO-SIMD128-NEXT: i32.const $push14=, 7
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $2
-; NO-SIMD128-NEXT: i32.const $push16=, 6
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $2
-; NO-SIMD128-NEXT: i32.const $push18=, 5
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $2
-; NO-SIMD128-NEXT: i32.const $push20=, 3
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.store8 0($pop21), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <16 x i8> %x, <16 x i8> %y,
<16 x i32> <i32 1, i32 undef, i32 undef, i32 undef,
@@ -641,44 +531,22 @@ define <16 x i8> @build_v16i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3,
; NO-SIMD128-LABEL: build_v16i8:
; NO-SIMD128: .functype build_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store8 15($0), $16
+; NO-SIMD128-NEXT: i32.store8 14($0), $15
+; NO-SIMD128-NEXT: i32.store8 13($0), $14
+; NO-SIMD128-NEXT: i32.store8 12($0), $13
+; NO-SIMD128-NEXT: i32.store8 11($0), $12
+; NO-SIMD128-NEXT: i32.store8 10($0), $11
+; NO-SIMD128-NEXT: i32.store8 9($0), $10
; NO-SIMD128-NEXT: i32.store8 8($0), $9
+; NO-SIMD128-NEXT: i32.store8 7($0), $8
+; NO-SIMD128-NEXT: i32.store8 6($0), $7
+; NO-SIMD128-NEXT: i32.store8 5($0), $6
; NO-SIMD128-NEXT: i32.store8 4($0), $5
+; NO-SIMD128-NEXT: i32.store8 3($0), $4
; NO-SIMD128-NEXT: i32.store8 2($0), $3
; NO-SIMD128-NEXT: i32.store8 1($0), $2
; NO-SIMD128-NEXT: i32.store8 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 15
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store8 0($pop1), $16
-; NO-SIMD128-NEXT: i32.const $push2=, 14
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store8 0($pop3), $15
-; NO-SIMD128-NEXT: i32.const $push4=, 13
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store8 0($pop5), $14
-; NO-SIMD128-NEXT: i32.const $push6=, 12
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store8 0($pop7), $13
-; NO-SIMD128-NEXT: i32.const $push8=, 11
-; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8
-; NO-SIMD128-NEXT: i32.store8 0($pop9), $12
-; NO-SIMD128-NEXT: i32.const $push10=, 10
-; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10
-; NO-SIMD128-NEXT: i32.store8 0($pop11), $11
-; NO-SIMD128-NEXT: i32.const $push12=, 9
-; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12
-; NO-SIMD128-NEXT: i32.store8 0($pop13), $10
-; NO-SIMD128-NEXT: i32.const $push14=, 7
-; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14
-; NO-SIMD128-NEXT: i32.store8 0($pop15), $8
-; NO-SIMD128-NEXT: i32.const $push16=, 6
-; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16
-; NO-SIMD128-NEXT: i32.store8 0($pop17), $7
-; NO-SIMD128-NEXT: i32.const $push18=, 5
-; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18
-; NO-SIMD128-NEXT: i32.store8 0($pop19), $6
-; NO-SIMD128-NEXT: i32.const $push20=, 3
-; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20
-; NO-SIMD128-NEXT: i32.store8 0($pop21), $4
; NO-SIMD128-NEXT: return
i8 %x4, i8 %x5, i8 %x6, i8 %x7,
i8 %x8, i8 %x9, i8 %x10, i8 %x11,
@@ -734,22 +602,14 @@ define <8 x i16> @splat_v8i16(i16 %x) {
; NO-SIMD128-LABEL: splat_v8i16:
; NO-SIMD128: .functype splat_v8i16 (i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store16 14($0), $1
+; NO-SIMD128-NEXT: i32.store16 12($0), $1
+; NO-SIMD128-NEXT: i32.store16 10($0), $1
; NO-SIMD128-NEXT: i32.store16 8($0), $1
+; NO-SIMD128-NEXT: i32.store16 6($0), $1
; NO-SIMD128-NEXT: i32.store16 4($0), $1
; NO-SIMD128-NEXT: i32.store16 2($0), $1
; NO-SIMD128-NEXT: i32.store16 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 14
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store16 0($pop1), $1
-; NO-SIMD128-NEXT: i32.const $push2=, 12
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store16 0($pop3), $1
-; NO-SIMD128-NEXT: i32.const $push4=, 10
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $1
-; NO-SIMD128-NEXT: i32.const $push6=, 6
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $1
; NO-SIMD128-NEXT: return
%v = insertelement <8 x i16> undef, i16 %x, i32 0
%res = shufflevector <8 x i16> %v, <8 x i16> undef,
@@ -1016,22 +876,14 @@ define <8 x i16> @replace_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-LABEL: replace_v8i16:
; NO-SIMD128: .functype replace_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store16 14($0), $9
+; NO-SIMD128-NEXT: i32.store16 12($0), $7
+; NO-SIMD128-NEXT: i32.store16 10($0), $6
; NO-SIMD128-NEXT: i32.store16 8($0), $5
+; NO-SIMD128-NEXT: i32.store16 6($0), $4
; NO-SIMD128-NEXT: i32.store16 4($0), $3
; NO-SIMD128-NEXT: i32.store16 2($0), $2
; NO-SIMD128-NEXT: i32.store16 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 14
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store16 0($pop1), $9
-; NO-SIMD128-NEXT: i32.const $push2=, 12
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store16 0($pop3), $7
-; NO-SIMD128-NEXT: i32.const $push4=, 10
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $6
-; NO-SIMD128-NEXT: i32.const $push6=, 6
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $4
; NO-SIMD128-NEXT: return
%res = insertelement <8 x i16> %v, i16 %x, i32 7
ret <8 x i16> %res
@@ -1095,22 +947,14 @@ define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) {
; NO-SIMD128-LABEL: replace_zero_v8i16:
; NO-SIMD128: .functype replace_zero_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store16 14($0), $8
+; NO-SIMD128-NEXT: i32.store16 12($0), $7
+; NO-SIMD128-NEXT: i32.store16 10($0), $6
; NO-SIMD128-NEXT: i32.store16 8($0), $5
+; NO-SIMD128-NEXT: i32.store16 6($0), $4
; NO-SIMD128-NEXT: i32.store16 4($0), $3
; NO-SIMD128-NEXT: i32.store16 2($0), $2
; NO-SIMD128-NEXT: i32.store16 0($0), $9
-; NO-SIMD128-NEXT: i32.const $push0=, 14
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store16 0($pop1), $8
-; NO-SIMD128-NEXT: i32.const $push2=, 12
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store16 0($pop3), $7
-; NO-SIMD128-NEXT: i32.const $push4=, 10
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $6
-; NO-SIMD128-NEXT: i32.const $push6=, 6
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $4
; NO-SIMD128-NEXT: return
%res = insertelement <8 x i16> %v, i16 %x, i32 0
ret <8 x i16> %res
@@ -1126,22 +970,14 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: shuffle_v8i16:
; NO-SIMD128: .functype shuffle_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store16 14($0), $16
+; NO-SIMD128-NEXT: i32.store16 12($0), $7
+; NO-SIMD128-NEXT: i32.store16 10($0), $14
; NO-SIMD128-NEXT: i32.store16 8($0), $5
+; NO-SIMD128-NEXT: i32.store16 6($0), $12
; NO-SIMD128-NEXT: i32.store16 4($0), $3
; NO-SIMD128-NEXT: i32.store16 2($0), $10
; NO-SIMD128-NEXT: i32.store16 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 14
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store16 0($pop1), $16
-; NO-SIMD128-NEXT: i32.const $push2=, 12
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store16 0($pop3), $7
-; NO-SIMD128-NEXT: i32.const $push4=, 10
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $14
-; NO-SIMD128-NEXT: i32.const $push6=, 6
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $12
; NO-SIMD128-NEXT: return
%res = shufflevector <8 x i16> %x, <8 x i16> %y,
<8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
@@ -1158,22 +994,14 @@ define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v8i16:
; NO-SIMD128: .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store16 14($0), $2
+; NO-SIMD128-NEXT: i32.store16 12($0), $2
+; NO-SIMD128-NEXT: i32.store16 10($0), $2
; NO-SIMD128-NEXT: i32.store16 8($0), $2
+; NO-SIMD128-NEXT: i32.store16 6($0), $2
; NO-SIMD128-NEXT: i32.store16 4($0), $2
; NO-SIMD128-NEXT: i32.store16 2($0), $2
; NO-SIMD128-NEXT: i32.store16 0($0), $2
-; NO-SIMD128-NEXT: i32.const $push0=, 14
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store16 0($pop1), $2
-; NO-SIMD128-NEXT: i32.const $push2=, 12
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store16 0($pop3), $2
-; NO-SIMD128-NEXT: i32.const $push4=, 10
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $2
-; NO-SIMD128-NEXT: i32.const $push6=, 6
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <8 x i16> %x, <8 x i16> %y,
<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef,
@@ -1198,22 +1026,14 @@ define <8 x i16> @build_v8i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3,
; NO-SIMD128-LABEL: build_v8i16:
; NO-SIMD128: .functype build_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store16 14($0), $8
+; NO-SIMD128-NEXT: i32.store16 12($0), $7
+; NO-SIMD128-NEXT: i32.store16 10($0), $6
; NO-SIMD128-NEXT: i32.store16 8($0), $5
+; NO-SIMD128-NEXT: i32.store16 6($0), $4
; NO-SIMD128-NEXT: i32.store16 4($0), $3
; NO-SIMD128-NEXT: i32.store16 2($0), $2
; NO-SIMD128-NEXT: i32.store16 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 14
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store16 0($pop1), $8
-; NO-SIMD128-NEXT: i32.const $push2=, 12
-; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2
-; NO-SIMD128-NEXT: i32.store16 0($pop3), $7
-; NO-SIMD128-NEXT: i32.const $push4=, 10
-; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4
-; NO-SIMD128-NEXT: i32.store16 0($pop5), $6
-; NO-SIMD128-NEXT: i32.const $push6=, 6
-; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6
-; NO-SIMD128-NEXT: i32.store16 0($pop7), $4
; NO-SIMD128-NEXT: return
i16 %x4, i16 %x5, i16 %x6, i16 %x7) {
%t0 = insertelement <8 x i16> undef, i16 %x0, i32 0
@@ -1258,12 +1078,10 @@ define <4 x i32> @splat_v4i32(i32 %x) {
; NO-SIMD128-LABEL: splat_v4i32:
; NO-SIMD128: .functype splat_v4i32 (i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store 12($0), $1
; NO-SIMD128-NEXT: i32.store 8($0), $1
; NO-SIMD128-NEXT: i32.store 4($0), $1
; NO-SIMD128-NEXT: i32.store 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store 0($pop1), $1
; NO-SIMD128-NEXT: return
%v = insertelement <4 x i32> undef, i32 %x, i32 0
%res = shufflevector <4 x i32> %v, <4 x i32> undef,
@@ -1368,12 +1186,10 @@ define <4 x i32> @replace_v4i32(<4 x i32> %v, i32 %x) {
; NO-SIMD128-LABEL: replace_v4i32:
; NO-SIMD128: .functype replace_v4i32 (i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store 12($0), $4
; NO-SIMD128-NEXT: i32.store 8($0), $5
; NO-SIMD128-NEXT: i32.store 4($0), $2
; NO-SIMD128-NEXT: i32.store 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store 0($pop1), $4
; NO-SIMD128-NEXT: return
%res = insertelement <4 x i32> %v, i32 %x, i32 2
ret <4 x i32> %res
@@ -1433,12 +1249,10 @@ define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) {
; NO-SIMD128-LABEL: replace_zero_v4i32:
; NO-SIMD128: .functype replace_zero_v4i32 (i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store 12($0), $4
; NO-SIMD128-NEXT: i32.store 8($0), $3
; NO-SIMD128-NEXT: i32.store 4($0), $2
; NO-SIMD128-NEXT: i32.store 0($0), $5
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store 0($pop1), $4
; NO-SIMD128-NEXT: return
%res = insertelement <4 x i32> %v, i32 %x, i32 0
ret <4 x i32> %res
@@ -1454,12 +1268,10 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: shuffle_v4i32:
; NO-SIMD128: .functype shuffle_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store 12($0), $8
; NO-SIMD128-NEXT: i32.store 8($0), $3
; NO-SIMD128-NEXT: i32.store 4($0), $6
; NO-SIMD128-NEXT: i32.store 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store 0($pop1), $8
; NO-SIMD128-NEXT: return
%res = shufflevector <4 x i32> %x, <4 x i32> %y,
<4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -1476,12 +1288,10 @@ define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v4i32:
; NO-SIMD128: .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store 12($0), $2
; NO-SIMD128-NEXT: i32.store 8($0), $2
; NO-SIMD128-NEXT: i32.store 4($0), $2
; NO-SIMD128-NEXT: i32.store 0($0), $2
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store 0($pop1), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <4 x i32> %x, <4 x i32> %y,
<4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -1501,12 +1311,10 @@ define <4 x i32> @build_v4i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
; NO-SIMD128-LABEL: build_v4i32:
; NO-SIMD128: .functype build_v4i32 (i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: i32.store 12($0), $4
; NO-SIMD128-NEXT: i32.store 8($0), $3
; NO-SIMD128-NEXT: i32.store 4($0), $2
; NO-SIMD128-NEXT: i32.store 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: i32.store 0($pop1), $4
; NO-SIMD128-NEXT: return
%t0 = insertelement <4 x i32> undef, i32 %x0, i32 0
%t1 = insertelement <4 x i32> %t0, i32 %x1, i32 1
@@ -1801,12 +1609,10 @@ define <4 x float> @splat_v4f32(float %x) {
; NO-SIMD128-LABEL: splat_v4f32:
; NO-SIMD128: .functype splat_v4f32 (i32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.store 12($0), $1
; NO-SIMD128-NEXT: f32.store 8($0), $1
; NO-SIMD128-NEXT: f32.store 4($0), $1
; NO-SIMD128-NEXT: f32.store 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: f32.store 0($pop1), $1
; NO-SIMD128-NEXT: return
%v = insertelement <4 x float> undef, float %x, i32 0
%res = shufflevector <4 x float> %v, <4 x float> undef,
@@ -1911,12 +1717,10 @@ define <4 x float> @replace_v4f32(<4 x float> %v, float %x) {
; NO-SIMD128-LABEL: replace_v4f32:
; NO-SIMD128: .functype replace_v4f32 (i32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.store 12($0), $4
; NO-SIMD128-NEXT: f32.store 8($0), $5
; NO-SIMD128-NEXT: f32.store 4($0), $2
; NO-SIMD128-NEXT: f32.store 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: f32.store 0($pop1), $4
; NO-SIMD128-NEXT: return
%res = insertelement <4 x float> %v, float %x, i32 2
ret <4 x float> %res
@@ -1976,12 +1780,10 @@ define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) {
; NO-SIMD128-LABEL: replace_zero_v4f32:
; NO-SIMD128: .functype replace_zero_v4f32 (i32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.store 12($0), $4
; NO-SIMD128-NEXT: f32.store 8($0), $3
; NO-SIMD128-NEXT: f32.store 4($0), $2
; NO-SIMD128-NEXT: f32.store 0($0), $5
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: f32.store 0($pop1), $4
; NO-SIMD128-NEXT: return
%res = insertelement <4 x float> %v, float %x, i32 0
ret <4 x float> %res
@@ -1997,12 +1799,10 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: shuffle_v4f32:
; NO-SIMD128: .functype shuffle_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.store 12($0), $8
; NO-SIMD128-NEXT: f32.store 8($0), $3
; NO-SIMD128-NEXT: f32.store 4($0), $6
; NO-SIMD128-NEXT: f32.store 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: f32.store 0($pop1), $8
; NO-SIMD128-NEXT: return
%res = shufflevector <4 x float> %x, <4 x float> %y,
<4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -2019,12 +1819,10 @@ define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v4f32:
; NO-SIMD128: .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.store 12($0), $2
; NO-SIMD128-NEXT: f32.store 8($0), $2
; NO-SIMD128-NEXT: f32.store 4($0), $2
; NO-SIMD128-NEXT: f32.store 0($0), $2
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: f32.store 0($pop1), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <4 x float> %x, <4 x float> %y,
<4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -2044,12 +1842,10 @@ define <4 x float> @build_v4f32(float %x0, float %x1, float %x2, float %x3) {
; NO-SIMD128-LABEL: build_v4f32:
; NO-SIMD128: .functype build_v4f32 (i32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.store 12($0), $4
; NO-SIMD128-NEXT: f32.store 8($0), $3
; NO-SIMD128-NEXT: f32.store 4($0), $2
; NO-SIMD128-NEXT: f32.store 0($0), $1
-; NO-SIMD128-NEXT: i32.const $push0=, 12
-; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0
-; NO-SIMD128-NEXT: f32.store 0($pop1), $4
; NO-SIMD128-NEXT: return
%t0 = insertelement <4 x float> undef, float %x0, i32 0
%t1 = insertelement <4 x float> %t0, float %x1, i32 1
diff --git a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 609be3b..50e736a 100644
--- a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s | FileCheck %s
; Check that the shr(shl X, 56), 48) is not mistakenly turned into
@@ -16,11 +17,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-unknown-linux-gnu"
define i64 @foo(i64 %b) nounwind readnone {
-entry:
; CHECK-LABEL: foo:
-; CHECK: movsbq %dil, %rax
-; CHECK: shlq $8, %rax
-; CHECK: orq $1, %rax
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movsbq %dil, %rax
+; CHECK-NEXT: shlq $8, %rax
+; CHECK-NEXT: incq %rax
+; CHECK-NEXT: retq
+entry:
%shl = shl i64 %b, 56 ; <i64> [#uses=1]
%shr = ashr i64 %shl, 48 ; <i64> [#uses=1]
%add5 = or i64 %shr, 1 ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/AppendingLinkage.ll b/llvm/test/CodeGen/X86/AppendingLinkage.ll
index 83bfbe8..ace5d19 100644
--- a/llvm/test/CodeGen/X86/AppendingLinkage.ll
+++ b/llvm/test/CodeGen/X86/AppendingLinkage.ll
@@ -1,4 +1,4 @@
; RUN: not --crash llc < %s -mtriple=i686-- 2>&1 | FileCheck %s
-; CHECK: unknown special variable
+; CHECK: unknown special variable with appending linkage
@foo = appending constant [1 x i32 ]zeroinitializer
diff --git a/llvm/test/CodeGen/X86/combine-pavg.ll b/llvm/test/CodeGen/X86/combine-pavg.ll
index 7a8ddf5..cb2d426 100644
--- a/llvm/test/CodeGen/X86/combine-pavg.ll
+++ b/llvm/test/CodeGen/X86/combine-pavg.ll
@@ -84,25 +84,22 @@ define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16
define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
; SSE-LABEL: combine_pavgw_demandedelts:
; SSE: # %bb.0:
-; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13]
; SSE-NEXT: pavgw %xmm1, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_pavgw_demandedelts:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13]
; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_pavgw_demandedelts:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
; AVX2-NEXT: retq
%s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1)
diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
index 548cf24..13c9585 100644
--- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
+++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir
@@ -869,13 +869,13 @@ body: |
$ymm0 = VSHUFPSZ256rmi $ymm0, $rdi, 1, $noreg, 0, $noreg, -24
; CHECK: $ymm0 = VSHUFPSYrri $ymm0, $ymm1, -24
$ymm0 = VSHUFPSZ256rri $ymm0, $ymm1, -24
- ; CHECK: $ymm0 = VROUNDPDYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
+ ; CHECK: $ymm0 = VROUNDPDYmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
$ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
- ; CHECK: $ymm0 = VROUNDPDYr $ymm0, 15, implicit $mxcsr
+ ; CHECK: $ymm0 = VROUNDPDYri $ymm0, 15, implicit $mxcsr
$ymm0 = VRNDSCALEPDZ256rri $ymm0, 15, implicit $mxcsr
- ; CHECK: $ymm0 = VROUNDPSYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
+ ; CHECK: $ymm0 = VROUNDPSYmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
$ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
- ; CHECK: $ymm0 = VROUNDPSYr $ymm0, 15, implicit $mxcsr
+ ; CHECK: $ymm0 = VROUNDPSYri $ymm0, 15, implicit $mxcsr
$ymm0 = VRNDSCALEPSZ256rri $ymm0, 15, implicit $mxcsr
; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $noreg, 0, $noreg, 32
$ymm0 = VSHUFF32X4Z256rmi $ymm0, $rip, 1, $noreg, 0, $noreg, 228
@@ -1751,13 +1751,13 @@ body: |
$xmm0 = VALIGNQZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, 1
; CHECK: $xmm0 = VPALIGNRrri $xmm0, $xmm1, 8
$xmm0 = VALIGNQZ128rri $xmm0, $xmm1, 1
- ; CHECK: $xmm0 = VROUNDPDm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDPDmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
$xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDPDr $xmm0, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDPDri $xmm0, 15, implicit $mxcsr
$xmm0 = VRNDSCALEPDZ128rri $xmm0, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDPSm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDPSmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
$xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDPSr $xmm0, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDPSri $xmm0, 15, implicit $mxcsr
$xmm0 = VRNDSCALEPSZ128rri $xmm0, 15, implicit $mxcsr
RET64
@@ -2308,21 +2308,21 @@ body: |
$xmm0 = VINSERTPSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, 1
; CHECK: $xmm0 = VINSERTPSrr $xmm0, $xmm0, 1
$xmm0 = VINSERTPSZrr $xmm0, $xmm0, 1
- ; CHECK: $xmm0 = VROUNDSDm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDSDmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
$xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDSDr $xmm0, $xmm1, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDSDri $xmm0, $xmm1, 15, implicit $mxcsr
$xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDSSm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDSSmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
$xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDSSr $xmm0, $xmm1, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDSSri $xmm0, $xmm1, 15, implicit $mxcsr
$xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDSDm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDSDmi_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
$xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDSDr_Int $xmm0, $xmm1, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDSDri_Int $xmm0, $xmm1, 15, implicit $mxcsr
$xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDSSm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDSSmi_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
$xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr
- ; CHECK: $xmm0 = VROUNDSSr_Int $xmm0, $xmm1, 15, implicit $mxcsr
+ ; CHECK: $xmm0 = VROUNDSSri_Int $xmm0, $xmm1, 15, implicit $mxcsr
$xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 15, implicit $mxcsr
RET64
diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll
index d9ee5f0..ee7f4ae 100644
--- a/llvm/test/CodeGen/X86/freeze-vector.ll
+++ b/llvm/test/CodeGen/X86/freeze-vector.ll
@@ -173,16 +173,14 @@ define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwin
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: vmovdqa (%edx), %xmm0
; X86-NEXT: vpand (%ecx), %xmm0, %xmm0
-; X86-NEXT: vpextrb $6, %xmm0, %ecx
-; X86-NEXT: movb %cl, (%eax)
+; X86-NEXT: vpextrb $6, %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: freeze_extractelement:
; X64: # %bb.0:
; X64-NEXT: vmovdqa (%rdi), %xmm0
; X64-NEXT: vpand (%rsi), %xmm0, %xmm0
-; X64-NEXT: vpextrb $6, %xmm0, %eax
-; X64-NEXT: movb %al, (%rdx)
+; X64-NEXT: vpextrb $6, %xmm0, (%rdx)
; X64-NEXT: retq
%i0 = load <16 x i8>, ptr %origin0
%i1 = load <16 x i8>, ptr %origin1
diff --git a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll
index 64d44d9..0123431 100644
--- a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll
+++ b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll
@@ -1,59 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=NUM
; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=SJLJ
-; NUM-COUNT-3: endbr64
-
-;SJLJ: main: # @main
-;SJLJ-NEXT: .Lfunc_begin0:
-;SJLJ-NEXT: # %bb.0: # %entry
-;SJLJ-NEXT: endbr64
-;SJLJ-NEXT: pushq %rbp
-;SJLJ: callq _Unwind_SjLj_Register
-;SJLJ-NEXT: .Ltmp0:
-;SJLJ-NEXT: callq _Z3foov
-;SJLJ-NEXT: .Ltmp1:
-;SJLJ-NEXT: # %bb.1: # %invoke.cont
-;SJLJ-NEXT: movl
-;SJLJ-NEXT: .LBB0_7: # %return
-;SJLJ: callq _Unwind_SjLj_Unregister
-;SJLJ: retq
-;SJLJ-NEXT: .LBB0_9:
-;SJLJ-NEXT: endbr64
-;SJLJ-NEXT: movl
-;SJLJ-NEXT: cmpl
-;SJLJ-NEXT: jb .LBB0_10
-;SJLJ-NEXT: # %bb.11:
-;SJLJ-NEXT: ud2
-;SJLJ-NEXT: .LBB0_10:
-;SJLJ-NEXT: leaq .LJTI0_0(%rip), %rcx
-;SJLJ-NEXT: jmpq *(%rcx,%rax,8)
-;SJLJ-NEXT: .LBB0_2: # %lpad
-;SJLJ-NEXT: .Ltmp2:
-;SJLJ-NEXT: endbr64
-;SJLJ: jne .LBB0_4
-;SJLJ-NEXT: # %bb.3: # %catch3
-;SJLJ: callq __cxa_begin_catch
-;SJLJ: jmp .LBB0_6
-;SJLJ-NEXT: .LBB0_4: # %catch.fallthrough
-;SJLJ-NEXT: cmpl
-;SJLJ-NEXT: jne .LBB0_8
-;SJLJ-NEXT: # %bb.5: # %catch
-;SJLJ: callq __cxa_begin_catch
-;SJLJ: cmpb
-;SJLJ-NEXT: .LBB0_6: # %return
-;SJLJ: callq __cxa_end_catch
-;SJLJ-NEXT: jmp .LBB0_7
-;SJLJ-NEXT: .LBB0_8: # %eh.resume
-;SJLJ-NEXT: movl
-;SJLJ-NEXT: .Lfunc_end0:
-;SJLJ: .LJTI0_0:
-;SJLJ-NEXT: .quad .LBB0_2
-
@_ZTIi = external dso_local constant ptr
@_ZTIc = external dso_local constant ptr
; Function Attrs: noinline norecurse optnone uwtable
define dso_local i32 @main() #0 personality ptr @__gxx_personality_sj0 {
+; NUM-LABEL: main:
+; NUM: # %bb.0: # %entry
+; NUM-NEXT: endbr64
+; NUM-NEXT: pushq %rbp
+; NUM-NEXT: movq %rsp, %rbp
+; NUM-NEXT: pushq %r15
+; NUM-NEXT: pushq %r14
+; NUM-NEXT: pushq %r13
+; NUM-NEXT: pushq %r12
+; NUM-NEXT: pushq %rbx
+; NUM-NEXT: subq $120, %rsp
+; NUM-NEXT: movl $0, -44(%rbp)
+; NUM-NEXT: movq $__gxx_personality_sj0, -120(%rbp)
+; NUM-NEXT: movq $GCC_except_table0, -112(%rbp)
+; NUM-NEXT: movq %rbp, -104(%rbp)
+; NUM-NEXT: movq %rsp, -88(%rbp)
+; NUM-NEXT: movq $.LBB0_9, -96(%rbp)
+; NUM-NEXT: movl $1, -144(%rbp)
+; NUM-NEXT: leaq -152(%rbp), %rdi
+; NUM-NEXT: callq _Unwind_SjLj_Register@PLT
+; NUM-NEXT: .Ltmp0:
+; NUM-NEXT: callq _Z3foov
+; NUM-NEXT: .Ltmp1:
+; NUM-NEXT: # %bb.1: # %invoke.cont
+; NUM-NEXT: movl $1, -44(%rbp)
+; NUM-NEXT: .LBB0_7: # %return
+; NUM-NEXT: movl -44(%rbp), %ebx
+; NUM-NEXT: leaq -152(%rbp), %rdi
+; NUM-NEXT: callq _Unwind_SjLj_Unregister@PLT
+; NUM-NEXT: movl %ebx, %eax
+; NUM-NEXT: addq $120, %rsp
+; NUM-NEXT: popq %rbx
+; NUM-NEXT: popq %r12
+; NUM-NEXT: popq %r13
+; NUM-NEXT: popq %r14
+; NUM-NEXT: popq %r15
+; NUM-NEXT: popq %rbp
+; NUM-NEXT: retq
+; NUM-NEXT: .LBB0_9:
+; NUM-NEXT: endbr64
+; NUM-NEXT: movl -144(%rbp), %eax
+; NUM-NEXT: cmpl $1, %eax
+; NUM-NEXT: jb .LBB0_10
+; NUM-NEXT: # %bb.11:
+; NUM-NEXT: ud2
+; NUM-NEXT: .LBB0_10:
+; NUM-NEXT: leaq .LJTI0_0(%rip), %rcx
+; NUM-NEXT: jmpq *(%rcx,%rax,8)
+; NUM-NEXT: .LBB0_2: # %lpad
+; NUM-NEXT: .Ltmp2:
+; NUM-NEXT: endbr64
+; NUM-NEXT: movl -140(%rbp), %ecx
+; NUM-NEXT: movl -136(%rbp), %eax
+; NUM-NEXT: movq %rcx, -56(%rbp)
+; NUM-NEXT: movl %eax, -64(%rbp)
+; NUM-NEXT: cmpl $2, %eax
+; NUM-NEXT: jne .LBB0_4
+; NUM-NEXT: # %bb.3: # %catch3
+; NUM-NEXT: movq -56(%rbp), %rdi
+; NUM-NEXT: movl $-1, -144(%rbp)
+; NUM-NEXT: callq __cxa_begin_catch
+; NUM-NEXT: movl (%rax), %eax
+; NUM-NEXT: movl %eax, -60(%rbp)
+; NUM-NEXT: xorl %ecx, %ecx
+; NUM-NEXT: cmpl $5, %eax
+; NUM-NEXT: jmp .LBB0_6
+; NUM-NEXT: .LBB0_4: # %catch.fallthrough
+; NUM-NEXT: cmpl $1, %eax
+; NUM-NEXT: jne .LBB0_8
+; NUM-NEXT: # %bb.5: # %catch
+; NUM-NEXT: movq -56(%rbp), %rdi
+; NUM-NEXT: movl $-1, -144(%rbp)
+; NUM-NEXT: callq __cxa_begin_catch
+; NUM-NEXT: movzbl (%rax), %eax
+; NUM-NEXT: movb %al, -45(%rbp)
+; NUM-NEXT: xorl %ecx, %ecx
+; NUM-NEXT: cmpb $3, %al
+; NUM-NEXT: .LBB0_6: # %return
+; NUM-NEXT: setne %cl
+; NUM-NEXT: movl %ecx, -44(%rbp)
+; NUM-NEXT: movl $-1, -144(%rbp)
+; NUM-NEXT: callq __cxa_end_catch
+; NUM-NEXT: jmp .LBB0_7
+; NUM-NEXT: .LBB0_8: # %eh.resume
+; NUM-NEXT: movl $-1, -144(%rbp)
+;
+; SJLJ-LABEL: main:
+; SJLJ: # %bb.0: # %entry
+; SJLJ-NEXT: endbr64
+; SJLJ-NEXT: pushq %rbp
+; SJLJ-NEXT: movq %rsp, %rbp
+; SJLJ-NEXT: pushq %r15
+; SJLJ-NEXT: pushq %r14
+; SJLJ-NEXT: pushq %r13
+; SJLJ-NEXT: pushq %r12
+; SJLJ-NEXT: pushq %rbx
+; SJLJ-NEXT: subq $120, %rsp
+; SJLJ-NEXT: movl $0, -44(%rbp)
+; SJLJ-NEXT: movq $__gxx_personality_sj0, -120(%rbp)
+; SJLJ-NEXT: movq $GCC_except_table0, -112(%rbp)
+; SJLJ-NEXT: movq %rbp, -104(%rbp)
+; SJLJ-NEXT: movq %rsp, -88(%rbp)
+; SJLJ-NEXT: movq $.LBB0_9, -96(%rbp)
+; SJLJ-NEXT: movl $1, -144(%rbp)
+; SJLJ-NEXT: leaq -152(%rbp), %rdi
+; SJLJ-NEXT: callq _Unwind_SjLj_Register@PLT
+; SJLJ-NEXT: .Ltmp0:
+; SJLJ-NEXT: callq _Z3foov
+; SJLJ-NEXT: .Ltmp1:
+; SJLJ-NEXT: # %bb.1: # %invoke.cont
+; SJLJ-NEXT: movl $1, -44(%rbp)
+; SJLJ-NEXT: .LBB0_7: # %return
+; SJLJ-NEXT: movl -44(%rbp), %ebx
+; SJLJ-NEXT: leaq -152(%rbp), %rdi
+; SJLJ-NEXT: callq _Unwind_SjLj_Unregister@PLT
+; SJLJ-NEXT: movl %ebx, %eax
+; SJLJ-NEXT: addq $120, %rsp
+; SJLJ-NEXT: popq %rbx
+; SJLJ-NEXT: popq %r12
+; SJLJ-NEXT: popq %r13
+; SJLJ-NEXT: popq %r14
+; SJLJ-NEXT: popq %r15
+; SJLJ-NEXT: popq %rbp
+; SJLJ-NEXT: retq
+; SJLJ-NEXT: .LBB0_9:
+; SJLJ-NEXT: endbr64
+; SJLJ-NEXT: movl -144(%rbp), %eax
+; SJLJ-NEXT: cmpl $1, %eax
+; SJLJ-NEXT: jb .LBB0_10
+; SJLJ-NEXT: # %bb.11:
+; SJLJ-NEXT: ud2
+; SJLJ-NEXT: .LBB0_10:
+; SJLJ-NEXT: leaq .LJTI0_0(%rip), %rcx
+; SJLJ-NEXT: jmpq *(%rcx,%rax,8)
+; SJLJ-NEXT: .LBB0_2: # %lpad
+; SJLJ-NEXT: .Ltmp2:
+; SJLJ-NEXT: endbr64
+; SJLJ-NEXT: movl -140(%rbp), %ecx
+; SJLJ-NEXT: movl -136(%rbp), %eax
+; SJLJ-NEXT: movq %rcx, -56(%rbp)
+; SJLJ-NEXT: movl %eax, -64(%rbp)
+; SJLJ-NEXT: cmpl $2, %eax
+; SJLJ-NEXT: jne .LBB0_4
+; SJLJ-NEXT: # %bb.3: # %catch3
+; SJLJ-NEXT: movq -56(%rbp), %rdi
+; SJLJ-NEXT: movl $-1, -144(%rbp)
+; SJLJ-NEXT: callq __cxa_begin_catch
+; SJLJ-NEXT: movl (%rax), %eax
+; SJLJ-NEXT: movl %eax, -60(%rbp)
+; SJLJ-NEXT: xorl %ecx, %ecx
+; SJLJ-NEXT: cmpl $5, %eax
+; SJLJ-NEXT: jmp .LBB0_6
+; SJLJ-NEXT: .LBB0_4: # %catch.fallthrough
+; SJLJ-NEXT: cmpl $1, %eax
+; SJLJ-NEXT: jne .LBB0_8
+; SJLJ-NEXT: # %bb.5: # %catch
+; SJLJ-NEXT: movq -56(%rbp), %rdi
+; SJLJ-NEXT: movl $-1, -144(%rbp)
+; SJLJ-NEXT: callq __cxa_begin_catch
+; SJLJ-NEXT: movzbl (%rax), %eax
+; SJLJ-NEXT: movb %al, -45(%rbp)
+; SJLJ-NEXT: xorl %ecx, %ecx
+; SJLJ-NEXT: cmpb $3, %al
+; SJLJ-NEXT: .LBB0_6: # %return
+; SJLJ-NEXT: setne %cl
+; SJLJ-NEXT: movl %ecx, -44(%rbp)
+; SJLJ-NEXT: movl $-1, -144(%rbp)
+; SJLJ-NEXT: callq __cxa_end_catch
+; SJLJ-NEXT: jmp .LBB0_7
+; SJLJ-NEXT: .LBB0_8: # %eh.resume
+; SJLJ-NEXT: movl $-1, -144(%rbp)
entry:
%retval = alloca i32, align 4
%exn.slot = alloca ptr
diff --git a/llvm/test/CodeGen/X86/load-local-v3i129.ll b/llvm/test/CodeGen/X86/load-local-v3i129.ll
index 8fa7ce0..eb5d172 100644
--- a/llvm/test/CodeGen/X86/load-local-v3i129.ll
+++ b/llvm/test/CodeGen/X86/load-local-v3i129.ll
@@ -12,7 +12,7 @@ define void @_start() nounwind {
; FAST-SHLD-NEXT: shrq $2, %rcx
; FAST-SHLD-NEXT: shldq $2, %rdx, %rcx
; FAST-SHLD-NEXT: andq $-4, %rax
-; FAST-SHLD-NEXT: orq $1, %rax
+; FAST-SHLD-NEXT: incq %rax
; FAST-SHLD-NEXT: movq %rax, -40(%rsp)
; FAST-SHLD-NEXT: movq %rcx, -32(%rsp)
; FAST-SHLD-NEXT: orq $-2, -56(%rsp)
@@ -23,7 +23,7 @@ define void @_start() nounwind {
; SLOW-SHLD: # %bb.0: # %Entry
; SLOW-SHLD-NEXT: movq -40(%rsp), %rax
; SLOW-SHLD-NEXT: andq $-4, %rax
-; SLOW-SHLD-NEXT: orq $1, %rax
+; SLOW-SHLD-NEXT: incq %rax
; SLOW-SHLD-NEXT: movq %rax, -40(%rsp)
; SLOW-SHLD-NEXT: orq $-2, -56(%rsp)
; SLOW-SHLD-NEXT: movq $-1, -48(%rsp)
diff --git a/llvm/test/CodeGen/X86/pr23664.ll b/llvm/test/CodeGen/X86/pr23664.ll
index 453e5db..8179602 100644
--- a/llvm/test/CodeGen/X86/pr23664.ll
+++ b/llvm/test/CodeGen/X86/pr23664.ll
@@ -6,7 +6,7 @@ define i2 @f(i32 %arg) {
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: leal (%rdi,%rdi), %eax
-; CHECK-NEXT: orb $1, %al
+; CHECK-NEXT: incb %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%trunc = trunc i32 %arg to i1
diff --git a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll
new file mode 100644
index 0000000..32c7e82
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll
@@ -0,0 +1,2213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-ALL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-PERLANE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL
+
+define <8 x i32> @trunc8i64_8i32_nsw(<8 x i64> %a) {
+; SSE-LABEL: trunc8i64_8i32_nsw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE-NEXT: movaps %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc8i64_8i32_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: trunc8i64_8i32_nsw:
+; AVX2-SLOW: # %bb.0: # %entry
+; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nsw:
+; AVX2-FAST-ALL: # %bb.0: # %entry
+; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-FAST-ALL-NEXT: retq
+;
+; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nsw:
+; AVX2-FAST-PERLANE: # %bb.0: # %entry
+; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX2-FAST-PERLANE-NEXT: retq
+;
+; AVX512-LABEL: trunc8i64_8i32_nsw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nsw <8 x i64> %a to <8 x i32>
+ ret <8 x i32> %0
+}
+
+define <8 x i32> @trunc8i64_8i32_nuw(<8 x i64> %a) {
+; SSE-LABEL: trunc8i64_8i32_nuw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE-NEXT: movaps %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc8i64_8i32_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: trunc8i64_8i32_nuw:
+; AVX2-SLOW: # %bb.0: # %entry
+; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nuw:
+; AVX2-FAST-ALL: # %bb.0: # %entry
+; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-FAST-ALL-NEXT: retq
+;
+; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nuw:
+; AVX2-FAST-PERLANE: # %bb.0: # %entry
+; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX2-FAST-PERLANE-NEXT: retq
+;
+; AVX512-LABEL: trunc8i64_8i32_nuw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nuw <8 x i64> %a to <8 x i32>
+ ret <8 x i32> %0
+}
+
+define <8 x i16> @trunc8i64_8i16_nsw(<8 x i64> %a) {
+; SSE2-SSSE3-LABEL: trunc8i64_8i16_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-SSSE3-NEXT: pslld $16, %xmm2
+; SSE2-SSSE3-NEXT: psrad $16, %xmm2
+; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-SSSE3-NEXT: pslld $16, %xmm0
+; SSE2-SSSE3-NEXT: psrad $16, %xmm0
+; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i64_8i16_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i64_8i16_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i64_8i16_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15]
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc8i64_8i16_nsw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nsw <8 x i64> %a to <8 x i16>
+ ret <8 x i16> %0
+}
+
+define <8 x i16> @trunc8i64_8i16_nuw(<8 x i64> %a) {
+; SSE2-SSSE3-LABEL: trunc8i64_8i16_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-SSSE3-NEXT: pslld $16, %xmm2
+; SSE2-SSSE3-NEXT: psrad $16, %xmm2
+; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-SSSE3-NEXT: pslld $16, %xmm0
+; SSE2-SSSE3-NEXT: psrad $16, %xmm0
+; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i64_8i16_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i64_8i16_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535]
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i64_8i16_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15]
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc8i64_8i16_nuw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nuw <8 x i64> %a to <8 x i16>
+ ret <8 x i16> %0
+}
+
+define void @trunc8i64_8i8_nsw(<8 x i64> %a) {
+; SSE2-SSSE3-LABEL: trunc8i64_8i8_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i64_8i8_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255]
+; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm4, %xmm2
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: pand %xmm4, %xmm1
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: packuswb %xmm0, %xmm0
+; SSE41-NEXT: movq %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i64_8i8_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255]
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i64_8i8_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc8i64_8i8_nsw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovqb %zmm0, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nsw <8 x i64> %a to <8 x i8>
+ store <8 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc8i64_8i8_nuw(<8 x i64> %a) {
+; SSE2-SSSE3-LABEL: trunc8i64_8i8_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i64_8i8_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255]
+; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm4, %xmm2
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: pand %xmm4, %xmm1
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: packuswb %xmm0, %xmm0
+; SSE41-NEXT: movq %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i64_8i8_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255]
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i64_8i8_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc8i64_8i8_nuw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovqb %zmm0, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nuw <8 x i64> %a to <8 x i8>
+ store <8 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define <8 x i16> @trunc8i32_8i16_nsw(<8 x i32> %a) {
+; SSE2-LABEL: trunc8i32_8i16_nsw:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc8i32_8i16_nsw:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i32_8i16_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i32_8i16_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i32_8i16_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc8i32_8i16_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc8i32_8i16_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i32_8i16_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc8i32_8i16_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <8 x i32> %a to <8 x i16>
+ ret <8 x i16> %0
+}
+
+define <8 x i16> @trunc8i32_8i16_nuw(<8 x i32> %a) {
+; SSE2-LABEL: trunc8i32_8i16_nuw:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc8i32_8i16_nuw:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i32_8i16_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i32_8i16_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i32_8i16_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc8i32_8i16_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc8i32_8i16_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i32_8i16_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc8i32_8i16_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <8 x i32> %a to <8 x i16>
+ ret <8 x i16> %0
+}
+
+define void @trunc8i32_8i8_nsw(<8 x i32> %a) {
+; SSE2-SSSE3-LABEL: trunc8i32_8i8_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i32_8i8_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packuswb %xmm0, %xmm0
+; SSE41-NEXT: movq %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i32_8i8_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vmovq %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i32_8i8_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc8i32_8i8_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, (%rax)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc8i32_8i8_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovdb %ymm0, (%rax)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i32_8i8_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc8i32_8i8_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax)
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <8 x i32> %a to <8 x i8>
+ store <8 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc8i32_8i8_nuw(<8 x i32> %a) {
+; SSE2-SSSE3-LABEL: trunc8i32_8i8_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0
+; SSE2-SSSE3-NEXT: movq %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i32_8i8_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packuswb %xmm0, %xmm0
+; SSE41-NEXT: movq %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i32_8i8_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vmovq %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i32_8i8_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc8i32_8i8_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, (%rax)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc8i32_8i8_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovdb %ymm0, (%rax)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i32_8i8_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc8i32_8i8_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax)
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <8 x i32> %a to <8 x i8>
+ store <8 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc16i32_16i16_nsw(<16 x i32> %a) {
+; SSE2-LABEL: trunc16i32_16i16_nsw:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: pslld $16, %xmm3
+; SSE2-NEXT: psrad $16, %xmm3
+; SSE2-NEXT: pslld $16, %xmm2
+; SSE2-NEXT: psrad $16, %xmm2
+; SSE2-NEXT: packssdw %xmm3, %xmm2
+; SSE2-NEXT: movdqu %xmm2, (%rax)
+; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc16i32_16i16_nsw:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm4, %xmm1
+; SSSE3-NEXT: pshufb %xmm4, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: pshufb %xmm4, %xmm3
+; SSSE3-NEXT: pshufb %xmm4, %xmm2
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSSE3-NEXT: movdqu %xmm2, (%rax)
+; SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc16i32_16i16_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7]
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: movdqu %xmm2, (%rax)
+; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc16i32_16i16_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %xmm1, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc16i32_16i16_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc16i32_16i16_nsw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovdw %zmm0, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nsw <16 x i32> %a to <16 x i16>
+ store <16 x i16> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc16i32_16i16_nuw(<16 x i32> %a) {
+; SSE2-LABEL: trunc16i32_16i16_nuw:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: pslld $16, %xmm3
+; SSE2-NEXT: psrad $16, %xmm3
+; SSE2-NEXT: pslld $16, %xmm2
+; SSE2-NEXT: psrad $16, %xmm2
+; SSE2-NEXT: packssdw %xmm3, %xmm2
+; SSE2-NEXT: movdqu %xmm2, (%rax)
+; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc16i32_16i16_nuw:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm4, %xmm1
+; SSSE3-NEXT: pshufb %xmm4, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: pshufb %xmm4, %xmm3
+; SSSE3-NEXT: pshufb %xmm4, %xmm2
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSSE3-NEXT: movdqu %xmm2, (%rax)
+; SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc16i32_16i16_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7]
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: movdqu %xmm2, (%rax)
+; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc16i32_16i16_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %xmm1, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc16i32_16i16_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc16i32_16i16_nuw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovdw %zmm0, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nuw <16 x i32> %a to <16 x i16>
+ store <16 x i16> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc16i32_16i8_nsw(<16 x i32> %a) {
+; SSE2-SSSE3-LABEL: trunc16i32_16i8_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc16i32_16i8_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255]
+; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm4, %xmm2
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: pand %xmm4, %xmm1
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packuswb %xmm2, %xmm0
+; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc16i32_16i8_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc16i32_16i8_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc16i32_16i8_nsw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nuw <16 x i32> %a to <16 x i8>
+ store <16 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc16i32_16i8_nuw(<16 x i32> %a) {
+; SSE2-SSSE3-LABEL: trunc16i32_16i8_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc16i32_16i8_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255]
+; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm4, %xmm2
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: pand %xmm4, %xmm1
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packuswb %xmm2, %xmm0
+; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc16i32_16i8_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc16i32_16i8_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc16i32_16i8_nuw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nuw <16 x i32> %a to <16 x i8>
+ store <16 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc16i16_16i8_nsw(<16 x i16> %a) {
+; SSE2-SSSE3-LABEL: trunc16i16_16i8_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc16i16_16i8_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: packuswb %xmm1, %xmm0
+; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc16i16_16i8_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc16i16_16i8_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc16i16_16i8_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc16i16_16i8_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc16i16_16i8_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc16i16_16i8_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax)
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <16 x i16> %a to <16 x i8>
+ store <16 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc16i16_16i8_nuw(<16 x i16> %a) {
+; SSE2-SSSE3-LABEL: trunc16i16_16i8_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc16i16_16i8_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: packuswb %xmm1, %xmm0
+; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc16i16_16i8_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc16i16_16i8_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc16i16_16i8_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc16i16_16i8_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc16i16_16i8_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: vmovdqu %xmm0, (%rax)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc16i16_16i8_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax)
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <16 x i16> %a to <16 x i8>
+ store <16 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc32i16_32i8_nsw(<32 x i16> %a) {
+; SSE2-SSSE3-LABEL: trunc32i16_32i8_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc32i16_32i8_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm4, %xmm1
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: packuswb %xmm1, %xmm0
+; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm4, %xmm2
+; SSE41-NEXT: packuswb %xmm3, %xmm2
+; SSE41-NEXT: movdqu %xmm2, (%rax)
+; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc32i16_32i8_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %xmm1, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc32i16_32i8_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc32i16_32i8_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512F-NEXT: vpmovdb %zmm1, (%rax)
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc32i16_32i8_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm1, (%rax)
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc32i16_32i8_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpmovwb %zmm0, (%rax)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc32i16_32i8_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax)
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <32 x i16> %a to <32 x i8>
+ store <32 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define void @trunc32i16_32i8_nuw(<32 x i16> %a) {
+; SSE2-SSSE3-LABEL: trunc32i16_32i8_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
+; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax)
+; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax)
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc32i16_32i8_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm4, %xmm1
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: packuswb %xmm1, %xmm0
+; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm4, %xmm2
+; SSE41-NEXT: packuswb %xmm3, %xmm2
+; SSE41-NEXT: movdqu %xmm2, (%rax)
+; SSE41-NEXT: movdqu %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc32i16_32i8_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqu %xmm1, (%rax)
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc32i16_32i8_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc32i16_32i8_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512F-NEXT: vpmovdb %zmm1, (%rax)
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc32i16_32i8_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm1, (%rax)
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc32i16_32i8_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpmovwb %zmm0, (%rax)
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc32i16_32i8_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax)
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <32 x i16> %a to <32 x i8>
+ store <32 x i8> %0, ptr undef, align 4
+ ret void
+}
+
+define <8 x i32> @trunc2x4i64_8i32_nsw(<4 x i64> %a, <4 x i64> %b) {
+; SSE-LABEL: trunc2x4i64_8i32_nsw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE-NEXT: movaps %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc2x4i64_8i32_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nsw:
+; AVX2-SLOW: # %bb.0: # %entry
+; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nsw:
+; AVX2-FAST-ALL: # %bb.0: # %entry
+; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-FAST-ALL-NEXT: retq
+;
+; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nsw:
+; AVX2-FAST-PERLANE: # %bb.0: # %entry
+; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX2-FAST-PERLANE-NEXT: retq
+;
+; AVX512-LABEL: trunc2x4i64_8i32_nsw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nsw <4 x i64> %a to <4 x i32>
+ %1 = trunc nsw <4 x i64> %b to <4 x i32>
+ %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %2
+}
+
+define <8 x i32> @trunc2x4i64_8i32_nuw(<4 x i64> %a, <4 x i64> %b) {
+; SSE-LABEL: trunc2x4i64_8i32_nuw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE-NEXT: movaps %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc2x4i64_8i32_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nuw:
+; AVX2-SLOW: # %bb.0: # %entry
+; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nuw:
+; AVX2-FAST-ALL: # %bb.0: # %entry
+; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
+; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-FAST-ALL-NEXT: retq
+;
+; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nuw:
+; AVX2-FAST-PERLANE: # %bb.0: # %entry
+; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
+; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
+; AVX2-FAST-PERLANE-NEXT: retq
+;
+; AVX512-LABEL: trunc2x4i64_8i32_nuw:
+; AVX512: # %bb.0: # %entry
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: retq
+entry:
+ %0 = trunc nuw <4 x i64> %a to <4 x i32>
+ %1 = trunc nuw <4 x i64> %b to <4 x i32>
+ %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @trunc2x4i64_8i16_nsw(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-SSSE3-NEXT: pslld $16, %xmm0
+; SSE2-SSSE3-NEXT: psrad $16, %xmm0
+; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-SSSE3-NEXT: pslld $16, %xmm2
+; SSE2-SSSE3-NEXT: psrad $16, %xmm2
+; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x4i64_8i16_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x4i64_8i16_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x4i64_8i16_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15]
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x4i64_8i16_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovqw %zmm1, %xmm1
+; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x4i64_8i16_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
+; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x4i64_8i16_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1
+; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x4i64_8i16_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
+; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1
+; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <4 x i64> %a to <4 x i16>
+ %1 = trunc nsw <4 x i64> %b to <4 x i16>
+ %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %2
+}
+
+define <8 x i16> @trunc2x4i64_8i16_nuw(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-SSSE3-NEXT: pslld $16, %xmm0
+; SSE2-SSSE3-NEXT: psrad $16, %xmm0
+; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE2-SSSE3-NEXT: pslld $16, %xmm2
+; SSE2-SSSE3-NEXT: psrad $16, %xmm2
+; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x4i64_8i16_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x4i64_8i16_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x4i64_8i16_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15]
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x4i64_8i16_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovqw %zmm1, %xmm1
+; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x4i64_8i16_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
+; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x4i64_8i16_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1
+; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x4i64_8i16_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
+; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1
+; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <4 x i64> %a to <4 x i16>
+ %1 = trunc nuw <4 x i64> %b to <4 x i16>
+ %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @trunc2x2i64_4i32_nsw(<2 x i64> %a, <2 x i64> %b) {
+; SSE-LABEL: trunc2x2i64_4i32_nsw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: trunc2x2i64_4i32_nsw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x2i64_4i32_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x2i64_4i32_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x2i64_4i32_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x2i64_4i32_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <2 x i64> %a to <2 x i32>
+ %1 = trunc nsw <2 x i64> %b to <2 x i32>
+ %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %2
+}
+
+define <4 x i32> @trunc2x2i64_4i32_nuw(<2 x i64> %a, <2 x i64> %b) {
+; SSE-LABEL: trunc2x2i64_4i32_nuw:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: trunc2x2i64_4i32_nuw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x2i64_4i32_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x2i64_4i32_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x2i64_4i32_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x2i64_4i32_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <2 x i64> %a to <2 x i32>
+ %1 = trunc nuw <2 x i64> %b to <2 x i32>
+ %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @trunc2x4i32_8i16_nsw(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: trunc2x4i32_8i16_nsw:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc2x4i32_8i16_nsw:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x4i32_8i16_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc2x4i32_8i16_nsw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x4i32_8i16_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x4i32_8i16_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x4i32_8i16_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x4i32_8i16_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <4 x i32> %a to <4 x i16>
+ %1 = trunc nsw <4 x i32> %b to <4 x i16>
+ %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %2
+}
+
+define <8 x i16> @trunc2x4i32_8i16_nuw(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: trunc2x4i32_8i16_nuw:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc2x4i32_8i16_nuw:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x4i32_8i16_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc2x4i32_8i16_nuw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x4i32_8i16_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x4i32_8i16_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VL-NEXT: vzeroupper
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x4i32_8i16_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x4i32_8i16_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <4 x i32> %a to <4 x i16>
+ %1 = trunc nuw <4 x i32> %b to <4 x i16>
+ %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %2
+}
+
+define <32 x i8> @trunc2x16i16_32i8_nsw(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-SSSE3-LABEL: trunc2x16i16_32i8_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4
+; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4
+; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x16i16_32i8_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm4, %xmm1
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: packuswb %xmm1, %xmm0
+; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: packuswb %xmm3, %xmm4
+; SSE41-NEXT: movdqa %xmm4, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x16i16_32i8_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x16i16_32i8_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x16i16_32i8_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x16i16_32i8_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x16i16_32i8_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x16i16_32i8_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <16 x i16> %a to <16 x i8>
+ %1 = trunc nsw <16 x i16> %b to <16 x i8>
+ %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ ret <32 x i8> %2
+}
+
+define <32 x i8> @trunc2x16i16_32i8_nuw(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-SSSE3-LABEL: trunc2x16i16_32i8_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4
+; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4
+; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x16i16_32i8_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm4, %xmm1
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: packuswb %xmm1, %xmm0
+; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: packuswb %xmm3, %xmm4
+; SSE41-NEXT: movdqa %xmm4, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x16i16_32i8_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x16i16_32i8_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x16i16_32i8_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x16i16_32i8_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
+; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x16i16_32i8_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x16i16_32i8_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <16 x i16> %a to <16 x i8>
+ %1 = trunc nuw <16 x i16> %b to <16 x i8>
+ %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ ret <32 x i8> %2
+}
+
+define <16 x i8> @trunc2x8i16_16i8_nsw(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-SSSE3-LABEL: trunc2x8i16_16i8_nsw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x8i16_16i8_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: packuswb %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x8i16_16i8_nsw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x8i16_16i8_nsw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x8i16_16i8_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x8i16_16i8_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x8i16_16i8_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x8i16_16i8_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <8 x i16> %a to <8 x i8>
+ %1 = trunc nsw <8 x i16> %b to <8 x i8>
+ %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %2
+}
+
+define <16 x i8> @trunc2x8i16_16i8_nuw(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-SSSE3-LABEL: trunc2x8i16_16i8_nuw:
+; SSE2-SSSE3: # %bb.0: # %entry
+; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
+; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
+; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x8i16_16i8_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: packuswb %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x8i16_16i8_nuw:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x8i16_16i8_nuw:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: trunc2x8i16_16i8_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc2x8i16_16i8_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x8i16_16i8_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc2x8i16_16i8_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0
+; AVX512BWVL-NEXT: vzeroupper
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <8 x i16> %a to <8 x i8>
+ %1 = trunc nuw <8 x i16> %b to <8 x i8>
+ %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %2
+}
+
+define i64 @trunc8i16_i64_nsw(<8 x i16> %inval) {
+; SSE2-LABEL: trunc8i16_i64_nsw:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: movq %xmm0, %rax
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc8i16_i64_nsw:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movq %xmm0, %rax
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i16_i64_nsw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: movq %xmm0, %rax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc8i16_i64_nsw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc8i16_i64_nsw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc8i16_i64_nsw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i16_i64_nsw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc8i16_i64_nsw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0
+; AVX512BWVL-NEXT: vmovq %xmm0, %rax
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nsw <8 x i16> %inval to <8 x i8>
+ %1 = bitcast <8 x i8> %0 to i64
+ ret i64 %1
+}
+
+define i64 @trunc8i16_i64_nuw(<8 x i16> %inval) {
+; SSE2-LABEL: trunc8i16_i64_nuw:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: movq %xmm0, %rax
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc8i16_i64_nuw:
+; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movq %xmm0, %rax
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i16_i64_nuw:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: movq %xmm0, %rax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc8i16_i64_nuw:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: retq
+;
+; AVX512F-LABEL: trunc8i16_i64_nuw:
+; AVX512F: # %bb.0: # %entry
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc8i16_i64_nuw:
+; AVX512VL: # %bb.0: # %entry
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vmovq %xmm0, %rax
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i16_i64_nuw:
+; AVX512BW: # %bb.0: # %entry
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc8i16_i64_nuw:
+; AVX512BWVL: # %bb.0: # %entry
+; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0
+; AVX512BWVL-NEXT: vmovq %xmm0, %rax
+; AVX512BWVL-NEXT: retq
+entry:
+ %0 = trunc nuw <8 x i16> %inval to <8 x i8>
+ %1 = bitcast <8 x i8> %0 to i64
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
index 691ca40..f7a27a5 100644
--- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
+++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll
@@ -65,6 +65,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax
+; X64-NO-BMI2-NEXT: movzwl %ax, %eax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrl %cl, %eax
@@ -74,6 +75,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl (%rdi), %eax
+; X64-BMI2-NEXT: movzwl %ax, %eax
; X64-BMI2-NEXT: shll $3, %esi
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: movb %al, (%rdx)
@@ -81,14 +83,15 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
;
; X86-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X86-NO-BMI2: # %bb.0:
-; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-BMI2-NEXT: movzwl (%eax), %eax
+; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NO-BMI2-NEXT: movzwl (%edx), %edx
+; X86-NO-BMI2-NEXT: movzwl %dx, %edx
; X86-NO-BMI2-NEXT: shll $3, %ecx
; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NO-BMI2-NEXT: shrl %cl, %eax
-; X86-NO-BMI2-NEXT: movb %al, (%edx)
+; X86-NO-BMI2-NEXT: shrl %cl, %edx
+; X86-NO-BMI2-NEXT: movb %dl, (%eax)
; X86-NO-BMI2-NEXT: retl
;
; X86-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half:
@@ -97,6 +100,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movzwl (%edx), %edx
+; X86-BMI2-NEXT: movzwl %dx, %edx
; X86-BMI2-NEXT: shll $3, %ecx
; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx
; X86-BMI2-NEXT: movb %cl, (%eax)
@@ -119,6 +123,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-NO-BMI2: # %bb.0:
; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax
+; X64-NO-BMI2-NEXT: movzwl %ax, %eax
; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NO-BMI2-NEXT: shrl %cl, %eax
@@ -128,6 +133,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X64-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half:
; X64-BMI2: # %bb.0:
; X64-BMI2-NEXT: movzwl (%rdi), %eax
+; X64-BMI2-NEXT: movzwl %ax, %eax
; X64-BMI2-NEXT: shll $3, %esi
; X64-BMI2-NEXT: shrxl %esi, %eax, %eax
; X64-BMI2-NEXT: movw %ax, (%rdx)
@@ -139,6 +145,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NO-BMI2-NEXT: movzwl (%edx), %edx
+; X86-NO-BMI2-NEXT: movzwl %dx, %edx
; X86-NO-BMI2-NEXT: shll $3, %ecx
; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NO-BMI2-NEXT: shrl %cl, %edx
@@ -151,6 +158,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-BMI2-NEXT: movzwl (%edx), %edx
+; X86-BMI2-NEXT: movzwl %dx, %edx
; X86-BMI2-NEXT: shll $3, %ecx
; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx
; X86-BMI2-NEXT: movw %cx, (%eax)